/* * Copyright (C) 2015 The Lemon Man * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ namespace Gedit { namespace FindInFilesPlugin { interface IMatcher : Object { public abstract bool has_match (uint8 *text, size_t text_length, size_t pos, ref Range match); } struct Range { size_t from; size_t to; } struct Bookmark { int line_number; size_t line_start; size_t line_length; public static const Bookmark EmptyBookmark = { 0, 0 }; } struct Result { string path; size_t line; string context; } class FindJob { public signal void on_match_found (Result result); public signal void on_search_finished (); // This queue holds all the file names to scan private AsyncQueue scan_queue = new AsyncQueue(); // The list of (hard) workers private List> thread_workers; // Count how many workers are still working private uint running_workers = 0; private IMatcher? matcher = null; private Cancellable cancellable; // The actual job parameters public bool include_hidden; public bool match_whole_word; public bool ignore_case; public string? needle { get; private set; } int worker () { while (true) { // Wait 0.5 seconds var tv = TimeVal (); tv.add (1000000 / 2); var path = scan_queue.timed_pop (ref tv); // Check for interruption if (cancellable.is_cancelled ()) break; // If path is null then we're probably done if (path == null) break; // Scan the file scan_file (path); } // We're done, check if we're the last worker active and signal it to the user lock (running_workers) { if (0 == (--running_workers)) { // Run the completion callback in the main thread Idle.add (() => { on_search_finished (); return false; }); } } return 0; } public FindJob (Cancellable? cancellable) { this.cancellable = cancellable ?? new Cancellable (); needle = null; include_hidden = false; match_whole_word = false; ignore_case = false; } public void prepare (string needle, bool is_regex) throws Error { // Construct the matcher instance if (is_regex) { matcher = new RegexFind (needle, ignore_case); } else { matcher = new BoyerMooreHorspool (needle, ignore_case); } } void start_workers_pool () { if (running_workers != 0) { return; } var online_cpus = get_num_processors (); for (var i = 0; i < online_cpus; i++) { thread_workers.append (new Thread ("Worker %d".printf (i), worker)); } running_workers = online_cpus; } public string extract_context (uint8 *buffer, Range range) { uint8 *slice = new uint8[range.to - range.from]; Posix.memcpy (slice, buffer + range.from, range.to - range.from); return Gedit.utils_make_valid_utf8 ((string)slice); } public void halt () { if (running_workers == 0) return; cancellable.cancel (); foreach (var worker in thread_workers) { worker.join (); } } bool is_binary (uint8 *buffer, size_t buffer_size) { // Poor-man binary detection, check for NULL bytes return Posix.memchr (buffer, '\0', buffer_size) != null; } bool ignore_vcs_dir (FileInfo file) { if (file.get_file_type () == FileType.DIRECTORY) { var name = file.get_name (); if (name == ".git" || name == ".bzr" || name == ".svn" || name == ".hg" || name == "_darcs") return true; } return false; } public async void execute (string root) throws Error { var queue = new Queue (); var visited = new HashTable (str_hash, str_equal); start_workers_pool (); queue.push_tail (File.new_for_path (root)); while (!queue.is_empty ()) { if (cancellable.is_cancelled ()) break; var dir = queue.pop_head (); var e = yield dir.enumerate_children_async (FileAttribute.STANDARD_NAME, FileQueryInfoFlags.NOFOLLOW_SYMLINKS, Priority.DEFAULT); // Process the entries we got so far while (true) { var files = yield e.next_files_async (100); if (files == null) break; if (cancellable.is_cancelled ()) break; foreach (var file in files) { if (!include_hidden && file.get_name ()[0] == '.') continue; if (ignore_vcs_dir (file)) continue; var _file = dir.resolve_relative_path (file.get_name ()); switch (file.get_file_type ()) { case FileType.REGULAR: scan_queue.push (_file.get_path ()); break; case FileType.DIRECTORY: queue.push_tail (_file); visited.insert (_file.get_path (), true); break; case FileType.SYMBOLIC_LINK: var link_dest = Posix.realpath (_file.get_path ()); // Follow the link only if its not broken and we didn't visit it if (link_dest != null && !visited.contains (link_dest)) { if (FileUtils.test (link_dest, FileTest.IS_DIR) && (!ignore_case && link_dest[0] != '.')) queue.push_tail (File.new_for_path (link_dest)); } break; } } } } } // Returns the line number where the text span starting at 'from' and ending at 'to' lies. void get_line (uint8 *buffer, size_t buffer_size, ref Range span, ref Bookmark bookmark) { // We take an advantage by knowing that all the calls to get_line are sequential, hence // we save the position of the last matched line and start from there var line_count = bookmark.line_number; var line_start = bookmark.line_start; var ptr = buffer + bookmark.line_start; while (ptr < buffer + buffer_size) { // Find the newline uint8 *nl = Posix.memchr (ptr, '\n', buffer_size - (ptr - buffer)); if (nl == null) { // No more newlines, consider the trailing NULL as the final newline nl = buffer + buffer_size + 1; } else { // Skip the '\n' nl++; } var line_length = nl - ptr; // Check if the match is within this line if (span.from >= line_start && span.to < line_start + line_length) { // Update the bookmark bookmark.line_number = line_count; bookmark.line_start = line_start; bookmark.line_length = line_length - 1; return; } line_count++; line_start += line_length; ptr = nl; } // There's no way to exit the loop, unless there's a problem in the logic above assert_not_reached (); } bool is_word_boundary (uint8 *buf, size_t buf_size, size_t from, size_t to) { unichar ch; bool prev, next; unowned string str; assert (to > from && to <= buf_size); // There's not much we can do if (to - from > int.MAX) return false; // Silence the warning about ch being uninitialized ch = '\0'; prev = next = true; str = (string)(buf + from); // Those are being modified by the get_{prev,next}_char calls int start = 0; int end = (int)(to - from); // The match is on a word boundary if there are no consecutive alphanumeric // characters right before or after the match var head = str.get_char (0); if (start > 0 && str.get_prev_char (ref start, out ch)) prev = head.isalnum () != ch.isalnum (); var tail = str.get_char (end - 1); if (end < buf_size && str.get_next_char (ref end, out ch)) next = tail.isalnum () != ch.isalnum (); return prev && next; } void scan_file (string path) { MappedFile file; try { file = new MappedFile (path, false); } catch (FileError err) { warning (err.message); return; } var buffer_size = file.get_length (); var buffer = (uint8 *)file.get_contents (); // Skip binary files for obvious reasons if (is_binary (buffer, buffer_size)) return; Range match = { 0, 0 }; Bookmark bookmark = Bookmark.EmptyBookmark; var last_line = -1; for (size_t buffer_pos = 0; buffer_pos < buffer_size; ) { if (cancellable.is_cancelled ()) break; // Exit when there's no match if (!matcher.has_match (buffer, buffer_size, buffer_pos, ref match)) break; // Check if the match lies on a word boundary if (match_whole_word) { if (!is_word_boundary (buffer, buffer_size, (int)match.from, (int)match.to)) { buffer_pos = match.to; continue; } } get_line (buffer, buffer_size, ref match, ref bookmark); // Find out what line the match lies in var match_line = 1 + bookmark.line_number; // Notify that we got a match if (last_line != match_line) { Result res = { path, match_line, extract_context (buffer, match) }; on_match_found (res); } last_line = match_line; // Keep searching past the match buffer_pos = match.to; } } } } // namespace FindInFilesPlugin } // namespace Gedit