#include "llvm/Pass.h" #include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/LTO/legacy/LTOCodeGenerator.h" #include "annobin-global.h" #include #include #include #include using namespace llvm; namespace { static bool be_verbose = false; static bool target_start_sym_bias = false; // Helper functions used throughout this file. template char * concat (Tys const&... args) { std::ostringstream oss; (void) std::initializer_list{((oss << args), 1)...}; return strdup (oss.str().c_str()); } static inline void inform (char const fmt[], ...) { va_list args; va_start (args, fmt); fflush (stdout); fprintf (stderr, "Annobin: "); vfprintf (stderr, fmt, args); fputc ('\n', stderr); va_end (args); } static inline void verbose (char const fmt[], ...) { if (! be_verbose) return; va_list args; va_start (args, fmt); fflush (stdout); fprintf (stderr, "Annobin: "); vfprintf (stderr, fmt, args); fputc ('\n', stderr); va_end (args); } static inline void ice (char const fmt[], ...) { va_list args; va_start (args, fmt); fflush (stdout); fprintf (stderr, "Annobin: Internal Error: "); vfprintf (stderr, fmt , args); fputc ('\n', stderr); va_end (args); exit (EXIT_FAILURE); } class AnnobinModulePass : public ModulePass { private: const unsigned int version = ANNOBIN_VERSION; char const * fileStart = nullptr; char const * fileEnd = nullptr; unsigned int optLevel; void OutputNote (Module & module, const char * name, unsigned namesz, bool name_is_string, const char * name_description, unsigned int type, const char * start_symbol, const char * end_symbol, const char * section_name) { std::ostringstream text_buffer; static char buf[1280]; // FIXME: We should be using a dynamically alloctaed buffer. static const int align = 4; sprintf (buf, ".pushsection %s, \"\", %%note", section_name); add_line_to_note (text_buffer, buf); sprintf (buf, ".balign %d", align); add_line_to_note (text_buffer, buf); if (name == nullptr) { if (namesz) ice ("null name with non-zero size"); add_line_to_note (text_buffer, ".dc.l 0", "no name"); } else if (name_is_string) { char buf2[128]; // FIXME: This should be dynamic and extendable. if (strlen ((char *) name) != namesz - 1) ice ("name string does not match name size"); sprintf (buf, ".dc.l %u", namesz); sprintf (buf2, "size of name [= strlen (%s)]\n", name); add_line_to_note (text_buffer, buf, buf2); } else { sprintf (buf, ".dc.l %u", namesz); add_line_to_note (text_buffer, buf, "size of name"); } if (start_symbol != NULL) { if (end_symbol == NULL) ice ("start symbol without an end symbol"); add_line_to_note (text_buffer, ".dc.l 16", "description size [= 2 * sizeof (address)]"); } else { if (end_symbol != NULL) ice ("end symbol without a start symbol"); add_line_to_note (text_buffer, ".dc.l 0", "no description"); } sprintf (buf, ".dc.l %d", type); add_line_to_note (text_buffer, buf, "note type [256 = GLOBAL, 257 = FUNCTION]"); if (name) { if (name_is_string) { add_line_to_note (text_buffer, name, name_description); } else { sprintf (buf, ".dc.b"); for (unsigned i = 0; i < namesz; i++) sprintf (buf + strlen (buf), " %#x%c", ((unsigned char *) name)[i], i < (namesz - 1) ? ',' : ' '); add_line_to_note (text_buffer, buf, name_description); } if (namesz % align) { sprintf (buf, ".dc.b"); while (namesz % align) { namesz++; if (namesz % align) strcat (buf, " 0,"); else strcat (buf, " 0"); } add_line_to_note (text_buffer, buf, "padding"); } } if (start_symbol) { sprintf (buf, ".quad %s", (char *) start_symbol); if (target_start_sym_bias) { /* We know that the annobin_current_filename symbol has been biased in order to avoid conflicting with the function name symbol for the first function in the file. So reverse that bias here. */ if (start_symbol == fileStart) sprintf (buf + strlen (buf), "- %d", target_start_sym_bias); } add_line_to_note (text_buffer, buf, "start symbol"); sprintf (buf, ".quad %s", (char *) end_symbol); add_line_to_note (text_buffer, buf, "end symbol"); } add_line_to_note (text_buffer, "\t.popsection\n\n"); module.appendModuleInlineAsm (text_buffer.str ()); } void OutputNumericNote (Module & module, const char * numeric_name, unsigned int val, const char * name_description) { char buffer [128]; // FIXME: This should be dynamic and extendable. unsigned len = sprintf (buffer, "GA%c%s", NUMERIC, numeric_name); char last_byte = 0; // For non-alphabetic names, we do not need, or want, // the terminating NUL at the end of the string. if (! isprint (numeric_name[0])) --len; verbose ("Record %s note as numeric value of %u", name_description, val); do { last_byte = buffer[++len] = val & 0xff; val >>= 8; } while (val); if (last_byte != 0) buffer[++len] = 0; OutputNote (module, buffer, len + 1, false, name_description, OPEN, fileStart, fileEnd, GNU_BUILD_ATTRS_SECTION_NAME); } void OutputStringNote (Module & module, const char string_type, const char * string, const char * name_description) { unsigned int len = strlen (string); char * buffer; buffer = (char *) malloc (len + 5); sprintf (buffer, "GA%c%c%s", STRING, string_type, string); verbose ("Record %s as '%s'", name_description, string); /* Be kind to readers of the assembler source, and do not put control characters into ascii strings. */ OutputNote (module, buffer, len + 5, isprint (string_type), name_description, OPEN, fileStart, fileEnd, GNU_BUILD_ATTRS_SECTION_NAME); free (buffer); } public: static char ID; AnnobinModulePass() : ModulePass (ID) { if (getenv ("ANNOBIN_VERBOSE") != NULL && ! streq (getenv ("ANNOBIN_VERBOSE"), "false")) be_verbose = true; } void setOptLevel (unsigned int val) { optLevel = val; } virtual StringRef getPassName (void) const { return "Annobin Module Pass"; } virtual bool runOnModule (Module & module) { static char buf [6400]; // FIXME: Use a dynamic string. std::string filename = module.getSourceFileName (); // Generate start and end symbols. convert_to_valid_symbol_name (filename); verbose ("Generate start and end symbols based on: %s", filename.c_str ()); fileStart = concat ("_annobin_", filename, "_start"); fileEnd = concat ("_annobin_", filename, "_end"); static const char START_TEXT[] = "\ \t.pushsection .text\n\ \t.hidden %s\n\ \t.type %s, STT_NOTYPE\n\ \t.equiv %s, .text\n\ \t.size %s, 0\n\ \t.pushsection .text.zzz\n\ \t.hidden %s\n\ \t.type %s, STT_NOTYPE\n\ \t.equiv %s, .text.zzz\n\ \t.size %s, 0\n\ \t.popsection\n"; sprintf (buf, START_TEXT, fileStart, fileStart, fileStart, fileStart, fileEnd, fileEnd, fileEnd, fileEnd); module.appendModuleInlineAsm (buf); // Generate version notes. sprintf (buf, "%d%c%d", SPEC_VERSION, ANNOBIN_TOOL_ID_LLVM, version); OutputStringNote (module, GNU_BUILD_ATTRIBUTE_VERSION, buf, "version note"); sprintf (buf, "annobin built by llvm version %s", LLVM_VERSION_STRING); OutputStringNote (module, GNU_BUILD_ATTRIBUTE_TOOL, buf, "tool note (plugin built by)"); sprintf (buf, "running on %s", LTOCodeGenerator::getVersionString ()); OutputStringNote (module, GNU_BUILD_ATTRIBUTE_TOOL, buf, "tool note (running on)"); // Generate a PIE note. unsigned int val; if (module.getPIELevel () > 0) val = 4; else if (module.getPICLevel () > 0) val = 2; else val = 0; char pic[2] = {GNU_BUILD_ATTRIBUTE_PIC, 0}; OutputNumericNote (module, pic, val, "PIE"); // Generate FORTIFY, SAFE STACK anf STACK PROT STRONG notes. // // Unfortunately, since we are looking at the IR we have no access // to any preprocessor defines. Instead we look for references to // functions that end in *_chk. This is not a perfect heuristic by // any means, but it is the best that I can think of for now. bool stack_prot_strong_found = false; bool safe_stack_found = false; bool fortify_found = false; for (auto GI = module.begin(), GE = module.end(); GI != GE; ++GI) { StringRef Name = GI->getName(); // FIXME: Surely there is a better way to do this. Function * func = module.getFunction (Name); if (func) { if (! stack_prot_strong_found && func->hasFnAttribute (Attribute::StackProtectStrong)) { char prot[2] = {GNU_BUILD_ATTRIBUTE_STACK_PROT, 0}; OutputNumericNote (module, prot, 3, "Stack Proctector Strong"); stack_prot_strong_found = true; } if (! safe_stack_found && func->hasFnAttribute(Attribute::SafeStack)) { // FIXME: Using the stack_clash note is not quite correct, but will do for now. OutputNumericNote (module, "stack_clash", 1, "SafeStack attribute"); safe_stack_found = true; } } if (fortify_found == false && Name.take_back(4) == "_chk") { OutputNumericNote (module, "FORTIFY", 2, "_FORTITFY_SOURCE used (probably)"); fortify_found = true; } if (safe_stack_found && fortify_found && stack_prot_strong_found) break; } if (! stack_prot_strong_found) OutputNumericNote (module, "StackProtStrong", 0, "Stack Proctector Strong"); if (! safe_stack_found) OutputNumericNote (module, "SafeStack", 0, "SafeStack attribute"); // Do not worry about missing FORTIFY functions. // Generate a GOW note. val = optLevel; if (val > 3) val = 3; // The optimization level occupies bits 9..11 of the GOW value. val <<= 9; // FIXME: For now we lie and say that -Wall was used. val |= 1 << 14; verbose ("optimization level is %u", optLevel); OutputNumericNote (module, "GOW", val, "Optimization Level"); // Generate a cf-protection note. val = 0; if (module.getModuleFlag("cf-protection-branch")) val += 1; if (module.getModuleFlag("cf-protection-return")) val += 2; // We bias the value by 1 so that we do not get confused by a zero value. val += 1; OutputNumericNote (module, "cf_protection", val, "Control Flow protection"); #if 0 if (be_verbose) { verbose ("Available module flags:"); SmallVector ModuleFlags; module.getModuleFlagsMetadata(ModuleFlags); for (const llvm::Module::ModuleFlagEntry &MFE : ModuleFlags) inform (" %s", MFE.Key->getString()); } #endif return true; // Module has been modified. } private: static void convert_to_valid_symbol_name (std::string& name) { for( auto & c : name) if (!isalnum (c)) c = '_'; } static void add_line_to_note (std::ostringstream & buffer, const char * text, const char * comment = nullptr) { buffer << '\t' << text; if (comment) buffer << " \t/* " << comment << " */"; buffer << '\n'; } }; // End of class AnnobinModulePass Pass * createAnnobinModulePass (int optLevel) { AnnobinModulePass * p; verbose ("Creating Module Pass"); p = new AnnobinModulePass; // FIXME: There must surely be a way to access this information from with the Module class. p->setOptLevel (optLevel); return p; } } char AnnobinModulePass::ID = 0; static void registerAnnobinModulePass (const PassManagerBuilder & PMB, legacy::PassManagerBase & PM) { static RegisterPass X("annobin", "Annobin Module Pass"); PM.add (createAnnobinModulePass ((int) PMB.OptLevel)); } // NB. The choice of when to run the passes is critical. Using // EP_EarlyAsPossible for example will run all the passes as Function passes, // even if they are Module passes. Whist using EP_ModuleOptimizerEarly will // not run the pass at -O0. Hence we use three different pass levels. static RegisterStandardPasses RegisterMyPass2 (PassManagerBuilder::EP_EnabledOnOptLevel0, registerAnnobinModulePass); static RegisterStandardPasses RegisterMyPass3 (PassManagerBuilder::EP_ModuleOptimizerEarly, registerAnnobinModulePass); // ------------------------------------------------------------------------------------- // Function Pass using namespace llvm; namespace { class AnnobinFunctionPass : public FunctionPass { static char ID; public: AnnobinFunctionPass() : FunctionPass (ID) {} virtual bool runOnFunction (Function & F) { // FIXME: Need to figure out how to get to the Module class from here. Module * M = F.getParent(); verbose ("Checking function %s in Module %p", F.getName(), M); return false; } virtual StringRef getPassName (void) const { return "Annobin Function Pass"; } }; } char AnnobinFunctionPass::ID = 0; static void registerAnnobinFunctionPass (const PassManagerBuilder & PMB, legacy::PassManagerBase & PM) { PM.add (new AnnobinFunctionPass ()); } static RegisterStandardPasses RegisterMyPass1 (PassManagerBuilder::EP_EarlyAsPossible, registerAnnobinFunctionPass);