/*
* Copyright (C) 2015 The Lemon Man
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
namespace Gedit {
namespace FindInFilesPlugin {
interface IMatcher : Object {
public abstract bool has_match (uint8 *text, size_t text_length, size_t pos, ref Range match);
}
struct Range {
size_t from;
size_t to;
}
struct Bookmark {
int line_number;
size_t line_start;
size_t line_length;
public static const Bookmark EmptyBookmark = { 0, 0 };
}
struct Result {
string path;
size_t line;
string context;
}
class FindJob {
public signal void on_match_found (Result result);
public signal void on_search_finished ();
// This queue holds all the file names to scan
private AsyncQueue<string> scan_queue = new AsyncQueue<string>();
// The list of (hard) workers
private List<Thread<int>> thread_workers;
// Count how many workers are still working
private uint running_workers = 0;
private IMatcher? matcher = null;
private Cancellable cancellable;
// The actual job parameters
public bool include_hidden;
public bool match_whole_word;
public bool ignore_case;
public string? needle { get; private set; }
int worker () {
while (true) {
// Wait 0.5 seconds
var tv = TimeVal ();
tv.add (1000000 / 2);
var path = scan_queue.timed_pop (ref tv);
// Check for interruption
if (cancellable.is_cancelled ())
break;
// If path is null then we're probably done
if (path == null)
break;
// Scan the file
scan_file (path);
}
// We're done, check if we're the last worker active and signal it to the user
lock (running_workers) {
if (0 == (--running_workers)) {
// Run the completion callback in the main thread
Idle.add (() => { on_search_finished (); return false; });
}
}
return 0;
}
public FindJob (Cancellable? cancellable) {
this.cancellable = cancellable ?? new Cancellable ();
needle = null;
include_hidden = false;
match_whole_word = false;
ignore_case = false;
}
public void prepare (string needle, bool is_regex) throws Error {
// Construct the matcher instance
if (is_regex) {
matcher = new RegexFind (needle, ignore_case);
} else {
matcher = new BoyerMooreHorspool (needle, ignore_case);
}
}
void start_workers_pool () {
if (running_workers != 0) {
return;
}
var online_cpus = get_num_processors ();
for (var i = 0; i < online_cpus; i++) {
thread_workers.append (new Thread<int> ("Worker %d".printf (i), worker));
}
running_workers = online_cpus;
}
public string extract_context (uint8 *buffer, Range range) {
uint8 *slice = new uint8[range.to - range.from];
Posix.memcpy (slice, buffer + range.from, range.to - range.from);
return Gedit.utils_make_valid_utf8 ((string)slice);
}
public void halt () {
if (running_workers == 0)
return;
cancellable.cancel ();
foreach (var worker in thread_workers) {
worker.join ();
}
}
bool is_binary (uint8 *buffer, size_t buffer_size) {
// Poor-man binary detection, check for NULL bytes
return Posix.memchr (buffer, '\0', buffer_size) != null;
}
bool ignore_vcs_dir (FileInfo file) {
if (file.get_file_type () == FileType.DIRECTORY) {
var name = file.get_name ();
if (name == ".git" ||
name == ".bzr" ||
name == ".svn" ||
name == ".hg" ||
name == "_darcs")
return true;
}
return false;
}
public async void execute (string root) throws Error {
var queue = new Queue<File> ();
var visited = new HashTable<string, bool> (str_hash, str_equal);
start_workers_pool ();
queue.push_tail (File.new_for_path (root));
while (!queue.is_empty ()) {
if (cancellable.is_cancelled ())
break;
var dir = queue.pop_head ();
var e = yield dir.enumerate_children_async (FileAttribute.STANDARD_NAME,
FileQueryInfoFlags.NOFOLLOW_SYMLINKS,
Priority.DEFAULT);
// Process the entries we got so far
while (true) {
var files = yield e.next_files_async (100);
if (files == null)
break;
if (cancellable.is_cancelled ())
break;
foreach (var file in files) {
if (!include_hidden && file.get_name ()[0] == '.')
continue;
if (ignore_vcs_dir (file))
continue;
var _file = dir.resolve_relative_path (file.get_name ());
switch (file.get_file_type ()) {
case FileType.REGULAR:
scan_queue.push (_file.get_path ());
break;
case FileType.DIRECTORY:
queue.push_tail (_file);
visited.insert (_file.get_path (), true);
break;
case FileType.SYMBOLIC_LINK:
var link_dest = Posix.realpath (_file.get_path ());
// Follow the link only if its not broken and we didn't visit it
if (link_dest != null && !visited.contains (link_dest)) {
if (FileUtils.test (link_dest, FileTest.IS_DIR) &&
(!ignore_case && link_dest[0] != '.'))
queue.push_tail (File.new_for_path (link_dest));
}
break;
}
}
}
}
}
// Returns the line number where the text span starting at 'from' and ending at 'to' lies.
void get_line (uint8 *buffer, size_t buffer_size, ref Range span, ref Bookmark bookmark) {
// We take an advantage by knowing that all the calls to get_line are sequential, hence
// we save the position of the last matched line and start from there
var line_count = bookmark.line_number;
var line_start = bookmark.line_start;
var ptr = buffer + bookmark.line_start;
while (ptr < buffer + buffer_size) {
// Find the newline
uint8 *nl = Posix.memchr (ptr, '\n', buffer_size - (ptr - buffer));
if (nl == null) {
// No more newlines, consider the trailing NULL as the final newline
nl = buffer + buffer_size + 1;
} else {
// Skip the '\n'
nl++;
}
var line_length = nl - ptr;
// Check if the match is within this line
if (span.from >= line_start && span.to < line_start + line_length) {
// Update the bookmark
bookmark.line_number = line_count;
bookmark.line_start = line_start;
bookmark.line_length = line_length - 1;
return;
}
line_count++;
line_start += line_length;
ptr = nl;
}
// There's no way to exit the loop, unless there's a problem in the logic above
assert_not_reached ();
}
bool is_word_boundary (uint8 *buf, size_t buf_size, size_t from, size_t to) {
unichar ch;
bool prev, next;
unowned string str;
assert (to > from && to <= buf_size);
// There's not much we can do
if (to - from > int.MAX)
return false;
// Silence the warning about ch being uninitialized
ch = '\0';
prev = next = true;
str = (string)(buf + from);
// Those are being modified by the get_{prev,next}_char calls
int start = 0;
int end = (int)(to - from);
// The match is on a word boundary if there are no consecutive alphanumeric
// characters right before or after the match
var head = str.get_char (0);
if (start > 0 && str.get_prev_char (ref start, out ch))
prev = head.isalnum () != ch.isalnum ();
var tail = str.get_char (end - 1);
if (end < buf_size && str.get_next_char (ref end, out ch))
next = tail.isalnum () != ch.isalnum ();
return prev && next;
}
void scan_file (string path) {
MappedFile file;
try {
file = new MappedFile (path, false);
}
catch (FileError err) {
warning (err.message);
return;
}
var buffer_size = file.get_length ();
var buffer = (uint8 *)file.get_contents ();
// Skip binary files for obvious reasons
if (is_binary (buffer, buffer_size))
return;
Range match = { 0, 0 };
Bookmark bookmark = Bookmark.EmptyBookmark;
var last_line = -1;
for (size_t buffer_pos = 0; buffer_pos < buffer_size; ) {
if (cancellable.is_cancelled ())
break;
// Exit when there's no match
if (!matcher.has_match (buffer, buffer_size, buffer_pos, ref match))
break;
// Check if the match lies on a word boundary
if (match_whole_word) {
if (!is_word_boundary (buffer, buffer_size, (int)match.from, (int)match.to)) {
buffer_pos = match.to;
continue;
}
}
get_line (buffer, buffer_size, ref match, ref bookmark);
// Find out what line the match lies in
var match_line = 1 + bookmark.line_number;
// Notify that we got a match
if (last_line != match_line) {
Result res = { path, match_line, extract_context (buffer, match) };
on_match_found (res);
}
last_line = match_line;
// Keep searching past the match
buffer_pos = match.to;
}
}
}
} // namespace FindInFilesPlugin
} // namespace Gedit