Blob Blame History Raw
/* annobin - a clang plugin for annotating the output binary file.
   Copyright (c) 2019, 2020 Red Hat.
   Created by Nick Clifton and Serge Guelton.

  This is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published
  by the Free Software Foundation; either version 3, or (at your
  option) any later version.

  It is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.  */

#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/AST/AST.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/PreprocessorOptions.h"
#include "clang/Sema/SemaConsumer.h"
#include "llvm/Support/raw_ostream.h"
#include "clang/Basic/Version.h"
#include "clang/Basic/TargetInfo.h"

using namespace std;
using namespace clang;
using namespace llvm;

#include "annobin-global.h"
#include <cstring>
#include <cctype>
#include <cstdarg>
#include <sstream>

namespace
{
  static const unsigned int   annobin_version = ANNOBIN_VERSION;
  bool                        be_verbose = false;

  // Helper functions used throughout this file.
  template<class... Tys>
  char *
  concat (Tys const&... args)
  {
    std::ostringstream oss;

    (void) std::initializer_list<int>{((oss << args), 1)...};
    return strdup (oss.str().c_str());
  }

  static inline void
  inform (char const fmt[], ...)
  {
    va_list args;

    va_start (args, fmt);
    fflush (stdout);
    fprintf (stderr, "Annobin: ");
    vfprintf (stderr, fmt, args);
    fputc ('\n', stderr);
    va_end (args);
  }

  static inline void
  verbose (char const fmt[], ...)
  {
    if (! be_verbose)
      return;

    va_list args;

    va_start (args, fmt);
    fflush (stdout);
    fprintf (stderr, "Annobin: ");
    vfprintf (stderr, fmt, args);
    fputc ('\n', stderr);
    va_end (args);
  }

  static inline void
  ice (char const fmt[], ...)
  {
    va_list args;

    va_start (args, fmt);
    fflush (stdout);
    fprintf (stderr, "Annobin: Internal Error: ");
    vfprintf (stderr, fmt , args);
    fputc ('\n', stderr);
    va_end (args);
    exit (EXIT_FAILURE);
  }

  class AnnobinConsumer : public ASTConsumer
  {
private:
    CompilerInstance& CI;
    unsigned int      target_start_sym_bias = 0;
    bool              is_32bit = false;
    char const*       annobin_current_file_start = nullptr;
    char const*       annobin_current_file_end = nullptr;

  public:
    AnnobinConsumer (CompilerInstance & CI) : CI (CI)
    {
    }
    
    void
    HandleTranslationUnit (ASTContext & Context) override
    {
      static char buf [6400];  // FIXME: Use a dynmically allocated buffer.

      is_32bit = Context.getTargetInfo().getPointerWidth(0) == 32;

      SourceManager & src = Context.getSourceManager ();
      std::string filename = src.getFilename (src.getLocForStartOfFile (src.getMainFileID ())).str ().c_str ();

      convert_to_valid_symbol_name (filename);
      verbose ("Generate start and end symbols based on: %s", filename.c_str());
      annobin_current_file_start = concat ("_annobin_", filename, "_start");
      annobin_current_file_end   = concat ("_annobin_", filename, "_end");

      // Generate start and end symbols.
      //
      // Note - we put the end symbol in a section called .text.zzz.
      // The hope is that that this section will be the last section allocated
      // to the .text section when the final link is made.  In that way we can
      // ensure that the note range will be from wherever the start symbol below
      // ends up in the final image to the end of the .text section in that image.
      // This does mean however that if more than one compilation unit is
      // linked together then the note ranges will overlap.
      //
      // The benefit of this approach is that if the linker discards any text
      // sections (eg because garbage collection is enabled, or linkonce is being
      // used), the note ranges will still be valid and there will no gaps.
      //
      // FIXME: This scheme fails if the user creates code sections that do not
      // start with .text. or which sort alphabetically after .text.zz.
      static const char START_TEXT[] = "\
\t.pushsection .text\n\
\t.hidden %s\n\
\t.type   %s, STT_NOTYPE\n\
\t.equiv  %s, .text + %d\n\
\t.size   %s, 0\n\
\t.pushsection .text.zzz\n\
\t.hidden %s\n\
\t.type   %s, STT_NOTYPE\n\
\t.equiv  %s, .text.zzz\n\
\t.size   %s, 0\n\
\t.popsection\n";
      sprintf (buf, START_TEXT,
	       annobin_current_file_start, annobin_current_file_start, annobin_current_file_start,
	       target_start_sym_bias, annobin_current_file_start,
	       annobin_current_file_end, annobin_current_file_end, annobin_current_file_end, annobin_current_file_end);

      AddAsmText (Context, buf);

      sprintf (buf, "%d%c%d", SPEC_VERSION, ANNOBIN_TOOL_ID_CLANG, annobin_version);
      OutputStringNote (Context,
			GNU_BUILD_ATTRIBUTE_VERSION, buf,
			"version note");

      sprintf (buf, "running on %s", getClangFullVersion ().c_str ());
      OutputStringNote (Context, GNU_BUILD_ATTRIBUTE_TOOL,
			buf, "tool note (running on)");
			
      sprintf (buf, "annobin built by clang version %s", CLANG_VERSION_STRING);
      OutputStringNote (Context, GNU_BUILD_ATTRIBUTE_TOOL,
			buf, "tool note (plugin built by)");

      // FIXME: Since we are using documented clang API functions
      // we assume that a version mistmatch bewteen the plugin builder
      // and the plugin consumer does not matter.  Check this...

      CheckOptions (CI, Context);
    }

  private:

    void
    convert_to_valid_symbol_name (std::string& name)
    {
      for( auto & c : name)
	if (!isalnum (c))
	  c = '_';
    }
    
    void
    AddAsmText (ASTContext & Context, StringRef text)
    {
      auto* TU = Context.getTranslationUnitDecl ();

      // SG: this is an ultra trick :-)
      // First I'm creating a new FileScopeAsmDecl
      // and then I'm calling the whole **global** ASTconsumer on it.
      // This ends up calling all the consumers, including the backend one
      // and so the decl gets added in the right place.
      Decl* NewDecl = FileScopeAsmDecl::Create
	(Context,
	 TU,
	 clang::StringLiteral::Create (Context, text, clang::StringLiteral::Ascii,
				       /*Pascal*/ false,
				       Context.getConstantArrayType (Context.CharTy,
								     llvm::APInt (32, text.size () + 1),
#if CLANG_VERSION_MAJOR > 8
								     nullptr,
#endif								     
								     clang::ArrayType::Normal,
								     /*IndexTypeQuals*/ 0),
				       SourceLocation ()),
	 {},
	 {});

      CI.getASTConsumer ().HandleTopLevelDecl (DeclGroupRef (NewDecl));
    }
    
    static void
    add_line_to_note (std::ostringstream & buffer, const char * text, const char * comment = nullptr)
    {
      buffer << '\t' << text;
      if (comment)
        buffer << " \t/* " << comment << " */";
      buffer << '\n';
    }

    void
    OutputNote (ASTContext &  Context,
		const char *  name,
		unsigned      namesz,
		bool          name_is_string,
		const char *  name_description,
		unsigned int  type,
		const char *  start_symbol,
		const char *  end_symbol,
		const char *  section_name)
    {
      std::ostringstream text_buffer;
      static char buf[1280];  // FIXME: We should be using a dynamically allocated buffer.
      static const int align = 4;  // FIXME: 8-byte align for 64-bit notes ?

      sprintf (buf, ".pushsection %s, \"\", %%note", section_name);
      add_line_to_note (text_buffer, buf);
      sprintf (buf, ".balign %d", align);
      add_line_to_note (text_buffer, buf);

      if (name == nullptr)
	{
	  if (namesz)
	    ice ("null name with non-zero size");

	  add_line_to_note (text_buffer, ".dc.l 0", "no name");
	}
      else if (name_is_string)
	{
	  char buf2[128];  // FIXME: This should be dynamic and extendable.

	  if (strlen ((char *) name) != namesz - 1)
	    ice ("name string does not match name size");

	  sprintf (buf, ".dc.l %u", namesz);
	  sprintf (buf2, "size of name [= strlen (%s)]\n", name);
	  add_line_to_note (text_buffer, buf, buf2);
	}
      else
	{
	  sprintf (buf, ".dc.l %u", namesz);
	  add_line_to_note (text_buffer, buf, "size of name");
	}

      if (start_symbol != NULL)
	{
	  if (end_symbol == NULL)
	    ice ("start symbol without an end symbol");

	  if (is_32bit)
	    add_line_to_note (text_buffer, ".dc.l 8", "description size [= 2 * sizeof (address)]");
	  else
	    add_line_to_note (text_buffer, ".dc.l 16", "description size [= 2 * sizeof (address)]");
	}
      else
	{
	  if (end_symbol != NULL)
	    ice ("end symbol without a start symbol");
	  add_line_to_note (text_buffer, ".dc.l 0", "no description");
	}

      sprintf (buf, ".dc.l %d", type);
      add_line_to_note (text_buffer, buf, "note type [256 = GLOBAL, 257 = FUNCTION]");

      if (name)
	{
	  if (name_is_string)
	    {
	      add_line_to_note (text_buffer, name, name_description);
	    }
	  else
	    {
	      sprintf (buf, ".dc.b");

	      for (unsigned i = 0; i < namesz; i++)
		sprintf (buf + strlen (buf), " %#x%c", ((unsigned char *) name)[i],
			 i < (namesz - 1) ? ',' : ' ');

	      add_line_to_note (text_buffer, buf, name_description);
	    }

	  if (namesz % align)
	    {
	      sprintf (buf, ".dc.b");
	      while (namesz % align)
		{
		  namesz++;
		  if (namesz % align)
		    strcat (buf, " 0,");
		  else
		    strcat (buf, " 0");
		}
	      add_line_to_note (text_buffer, buf, "padding");
	    }
	}

      if (start_symbol)
	{
	  sprintf (buf, "%s %s", is_32bit ? ".dc.l" : ".quad", (char *) start_symbol);
	  if (target_start_sym_bias)
	    {
	      /* We know that the annobin_current_filename symbol has been
		 biased in order to avoid conflicting with the function
		 name symbol for the first function in the file.  So reverse
		 that bias here.  */
	      if (start_symbol == annobin_current_file_start)
		sprintf (buf + strlen (buf), "- %d", target_start_sym_bias);
	    }

	  add_line_to_note (text_buffer, buf, "start symbol");

	  sprintf (buf, "%s %s", is_32bit ? ".dc.l" : ".quad", (char *) end_symbol);
	  add_line_to_note (text_buffer, buf, "end symbol");
	}

      add_line_to_note (text_buffer, "\t.popsection\n\n");

      AddAsmText (Context, text_buffer.str());
    }

    void
    OutputStringNote (ASTContext &  Context,
		      const char    string_type,
		      const char *  string,
		      const char *  name_description)
    {
      unsigned int len = strlen (string);
      char * buffer;

      buffer = (char *) malloc (len + 5);

      sprintf (buffer, "GA%c%c%s", STRING, string_type, string);

      verbose ("Record %s as '%s'", name_description, string);
      /* Be kind to readers of the assembler source, and do
	 not put control characters into ascii strings.  */
      OutputNote (Context,
		  buffer, len + 5, isprint (string_type), name_description,
		  OPEN, annobin_current_file_start, annobin_current_file_end,
		  GNU_BUILD_ATTRS_SECTION_NAME);

      free (buffer);
    }

    void
    OutputNumericNote (ASTContext &  Context,
		       const char *  numeric_name,
		       unsigned int  val,
		       const char *  name_description)
    {
      char buffer [128];  // FIXME: This should be dynamic and extendable.
      unsigned len = sprintf (buffer, "GA%c%s", NUMERIC, numeric_name);
      char last_byte = 0;

      // For non-alphabetic names, we do not need, or want, the terminating
      // NUL at the end of the string.
      if (! isprint (numeric_name[0]))
	--len;

      verbose ("Record %s value of %u", name_description, val);
	
      do
	{
	  last_byte = buffer[++len] = val & 0xff;
	  val >>= 8;
	}
      while (val);

      if (last_byte != 0)
	buffer[++len] = 0;

      OutputNote (Context, buffer, len + 1, false, name_description,
		  OPEN, annobin_current_file_start, annobin_current_file_end,
		  GNU_BUILD_ATTRS_SECTION_NAME);
    }

    void
    CheckOptions (CompilerInstance & CI, ASTContext & Context)
    {
      const CodeGenOptions & CodeOpts = CI.getCodeGenOpts ();

      unsigned int val = 0;
      val += CodeOpts.CFProtectionBranch ? 1 : 0;
      val += CodeOpts.CFProtectionReturn ? 2 : 0;
      // We bias the value by 1 so that we do not get confused by a zero value.
      val += 1;
      OutputNumericNote (Context, "cf_protection", val, "Control Flow protection");
      
      // The -cfguard option is Windows only - so we ignore it.

      val = CodeOpts.OptimizationLevel;
      if (val > 3)
	val = 3;
      // The optimization level occupies bits 9..11 of the GOW value.
      val <<= 9;
      // FIXME: The value of Context.getDiagnostics().getEnableAllWarnings() does
      // not appear to be valid in clang v9 onwards. :-(
      if (Context.getDiagnostics().getEnableAllWarnings())
	val |= (1 << 14);
      if (CodeOpts.PrepareForLTO || CodeOpts.PrepareForThinLTO)
	val |= (1 << 16);
      else
	val |= (1 << 17);

      verbose ("Optimization = %d, Wall = %d, LTO = %s",
	       CodeOpts.OptimizationLevel,
	       Context.getDiagnostics().getEnableAllWarnings(),
	       CodeOpts.PrepareForLTO || CodeOpts.PrepareForThinLTO ? "on" : "off"
	       );
      OutputNumericNote (Context, "GOW", val, "Optimization Level and Wall");

#if CLANG_VERSION_MAJOR > 7
      val = CodeOpts.SpeculativeLoadHardening ? 2 : 1;
      OutputNumericNote (Context, "SpecLoadHarden", val, "Speculative Load Hardening");
#endif
      
      const LangOptions & lang_opts = CI.getLangOpts ();

      switch (lang_opts.getStackProtector())
	{
	case clang::LangOptions::SSPStrong: val = 2; break;
	case clang::LangOptions::SSPOff: val = 0; break;
	case clang::LangOptions::SSPOn: val = 1; break;
	default: val = 0; break;
	}
	  
      char stack_prot[2] = {GNU_BUILD_ATTRIBUTE_STACK_PROT, 0};
      OutputNumericNote (Context, stack_prot, val, "Stack Protection");

      val = lang_opts.Sanitize.has (clang::SanitizerKind::SafeStack);
      OutputNumericNote (Context, "sanitize_safe_stack", val, "Sanitize Safe Stack");

      val = lang_opts.Sanitize.has (clang::SanitizerKind::CFICastStrict) ? 1 : 0;
      val += lang_opts.Sanitize.has (clang::SanitizerKind::CFIDerivedCast) ? 2 : 0;
      val += lang_opts.Sanitize.has (clang::SanitizerKind::CFIICall) ? 4 : 0;
      val += lang_opts.Sanitize.has (clang::SanitizerKind::CFIMFCall) ? 8 : 0;
      val += lang_opts.Sanitize.has (clang::SanitizerKind::CFIUnrelatedCast) ? 16 : 0;
      val += lang_opts.Sanitize.has (clang::SanitizerKind::CFINVCall) ? 32 : 0;
      val += lang_opts.Sanitize.has (clang::SanitizerKind::CFIVCall) ? 64 : 0;
      OutputNumericNote (Context, "sanitize_cfi", val, "Sanitize Control Flow Integrity");

      if (lang_opts.PIE)
	val = 4;
      else if (lang_opts.PICLevel > 0)
	val = 2;
      else
	val = 0;
      char pic[2] = {GNU_BUILD_ATTRIBUTE_PIC, 0};
      OutputNumericNote (Context, pic, val, "PIE");
            
#if 0 // Placeholder code for when we need to record preprocessor options
      const PreprocessorOptions & pre_opts = CI.getPreprocessorOpts ();
      if (pre_opts.Macros.empty ())
	{
	  verbose ("No preprocessor macros");
	}
      else
	{
	  for (std::vector<std::pair<std::string, bool/*isUndef*/> >::const_iterator
		 i = pre_opts.Macros.begin (),
		 iEnd = pre_opts.Macros.end ();
	       i != iEnd; ++i)
	    {
	      if (! i->second)
		verbose ("Define: %s", i->first.c_str());
	    }
	}
#endif

#if 0 // Placeholder code for when we need to record target specific options.
      const clang::TargetOptions & targ_opts = CI.getTargetOpts ();
      if (targ_opts.FeaturesAsWritten.empty ())
	{
	  verbose ("No target options");
	}
      else
	{
	  for (unsigned i = targ_opts.FeaturesAsWritten.size(); i -- > 0;)
	    verbose ("Target feature: %s", targ_opts.FeaturesAsWritten[i].c_str());
	}
#endif
    }    
  };

  class AnnobinDummyConsumer : public SemaConsumer
  {
    CompilerInstance & Instance;

  public:
    AnnobinDummyConsumer (CompilerInstance & Instance) : Instance (Instance)
    {}

    void
    HandleTranslationUnit (ASTContext &) override
    {
    }
  };
  
  class AnnobinAction : public PluginASTAction
  {
  private:
    bool enabled = true;

  protected:
    std::unique_ptr<ASTConsumer>
    CreateASTConsumer (CompilerInstance& CI, llvm::StringRef) override
    {
      if (enabled)
	return std::make_unique<AnnobinConsumer>(CI);
      else
	return std::make_unique<AnnobinDummyConsumer>(CI);
    }

    // Automatically run the plugin
    PluginASTAction::ActionType 
    getActionType (void) override
    {
      return AddBeforeMainAction;
    }

    // We do not want the plugin to stop the compilation of the binary.
    bool
    usesPreprocessorOnly (void) const override
    {
      return false;
    }

    // Handle any options passed to the plugin.
    bool
    ParseArgs (const CompilerInstance & , const std::vector<std::string>& args) override
    {
      for (unsigned i = 0, e = args.size(); i < e; ++i)
	{
	  if (args[i] == "help")
	    inform ("supported options:\n\
  help      Display this message\n\
  disable   Disable the plugin\n\
  enable    Reenable the plugin if it has been disabled\n\
  version   Displays the version number\n\
  verbose   Produce descriptive messages whilst working");
	  else if (args[i] == "disable")
	    enabled = false;
	  else if (args[i] == "enable")
	    enabled = true;
	  else if (args[i] == "version")
	    inform ("Annobin plugin version: %u", annobin_version);
	  else if (args[i] == "verbose")
	    be_verbose = true;
	  else
	    inform ("error: unknown option: %s", args[i].c_str());
	}

      return true;
    }
  };
}

static FrontendPluginRegistry::Add<AnnobinAction>
X("annobin", "annotate binary output");