Fix TAB-completion + .gdb_index slowness (generalize filename_seen_cache)

Tab completion when debugging a program binary that uses GDB index is
surprisingly much slower than when GDB uses psymtabs instead.  Around
1.5x/3x slower.  That's surprising, because the whole point of GDB
index is to speed things up...

For example, with:

 set pagination off
 set $count = 0
 while $count < 400
   complete b string_prin         # matches gdb's string_printf
   printf "count = %d\n", $count
   set $count = $count + 1
 end

 $ time ./gdb --batch -q  ./gdb-with-index -ex "source script.cmd"
 real    0m11.042s
 user    0m10.920s
 sys     0m0.042s

 $ time ./gdb --batch -q  ./gdb-without-index -ex "source script.cmd"
 real    0m4.635s
 user    0m4.590s
 sys     0m0.037s

Same but with:
 -   complete b string_prin
 +   complete b zzzzzz
to exercise the no-matches worst case, master currently gets you
something like:

 with index           without index
 real    0m11.971s    0m8.413s
 user    0m11.912s    0m8.355s
 sys     0m0.035s     0m0.035s

Running gdb under perf shows 80% spent inside
maybe_add_partial_symtab_filename, and 20% spent in the lbasename
inside that.

The problem that tab completion walks over all compunit symtabs, and
for each, walks the contained file symtabs.  And there a huge number
of file symtabs (each included system header, etc.) that appear in
each compunit symtab's file symtab list.  As in, when debugging GDB, I
have 367381 symtabs iterated, when of those only 5371 filenames are
unique...

This was a regression from the earlier (nice) split of symtabs in
compunit symtabs + file symtabs.

The fix here is to add a cache of unique filenames per objfile so that
the walk / uniquing is only done once.  There's already a abstraction
for this in symtab.c; this patch moves that code out to a separate
file and C++ifies it bit.

This makes the worst-case scenario above consistently drop to ~2.5s
(1.5s for the "string_prin" hit case), making it over 3.3x times
faster than psymtabs in this use case (7x in the "string_prin" hit
case).

gdb/ChangeLog:
2017-07-17  Pedro Alves  <palves@redhat.com>

	* Makefile.in (COMMON_OBS): Add filename-seen-cache.o.
	* dwarf2read.c: Include "filename-seen-cache.h".
	* dwarf2read.c (dwarf2_per_objfile) <filenames_cache>: New field.
	(dw2_map_symbol_filenames): Build and use a filenames_seen_cache.
	* filename-seen-cache.c: New file.
	* filename-seen-cache.h: New file.
	* symtab.c: Include "filename-seen-cache.h".
	(struct filename_seen_cache, INITIAL_FILENAME_SEEN_CACHE_SIZE)
	(create_filename_seen_cache, clear_filename_seen_cache)
	(delete_filename_seen_cache, filename_seen): Delete, parts moved
	to filename-seen-cache.h/filename-seen-cache.c.
	(output_source_filename, sources_info)
	(maybe_add_partial_symtab_filename)
	(make_source_files_completion_list): Adjust to use
	filename_seen_cache.
This commit is contained in:
Pedro Alves 2017-07-17 11:28:33 +01:00
parent 330cdd9891
commit bbf2f4dfae
6 changed files with 224 additions and 135 deletions

View File

@ -1,3 +1,21 @@
2017-07-17 Pedro Alves <palves@redhat.com>
* Makefile.in (COMMON_OBS): Add filename-seen-cache.o.
* dwarf2read.c: Include "filename-seen-cache.h".
* dwarf2read.c (dwarf2_per_objfile) <filenames_cache>: New field.
(dw2_map_symbol_filenames): Build and use a filenames_seen_cache.
* filename-seen-cache.c: New file.
* filename-seen-cache.h: New file.
* symtab.c: Include "filename-seen-cache.h".
(struct filename_seen_cache, INITIAL_FILENAME_SEEN_CACHE_SIZE)
(create_filename_seen_cache, clear_filename_seen_cache)
(delete_filename_seen_cache, filename_seen): Delete, parts moved
to filename-seen-cache.h/filename-seen-cache.c.
(output_source_filename, sources_info)
(maybe_add_partial_symtab_filename)
(make_source_files_completion_list): Adjust to use
filename_seen_cache.
2017-07-17 Pedro Alves <palves@redhat.com>
* dwarf2read.c (dwarf2_per_objfile): In-class initialize all

View File

@ -1714,6 +1714,7 @@ COMMON_OBS = $(DEPFILES) $(CONFIG_OBS) $(YYOBJ) \
f-typeprint.o \
f-valprint.o \
fileio.o \
filename-seen-cache.o \
filestuff.o \
filesystem.o \
findcmd.o \

View File

@ -74,7 +74,7 @@
#include "common/gdb_optional.h"
#include "common/underlying.h"
#include "common/byte-vector.h"
#include "filename-seen-cache.h"
#include <fcntl.h>
#include <sys/types.h>
#include <algorithm>
@ -349,6 +349,10 @@ public:
/* Table containing line_header indexed by offset and offset_in_dwz. */
htab_t line_header_hash {};
/* Table containing all filenames. This is an optional because the
table is lazily constructed on first access. */
gdb::optional<filename_seen_cache> filenames_cache;
};
static struct dwarf2_per_objfile *dwarf2_per_objfile;
@ -4312,64 +4316,75 @@ static void
dw2_map_symbol_filenames (struct objfile *objfile, symbol_filename_ftype *fun,
void *data, int need_fullname)
{
int i;
htab_up visited (htab_create_alloc (10, htab_hash_pointer, htab_eq_pointer,
NULL, xcalloc, xfree));
dw2_setup (objfile);
/* The rule is CUs specify all the files, including those used by
any TU, so there's no need to scan TUs here.
We can ignore file names coming from already-expanded CUs. */
for (i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
if (!dwarf2_per_objfile->filenames_cache)
{
struct dwarf2_per_cu_data *per_cu = dw2_get_cutu (i);
dwarf2_per_objfile->filenames_cache.emplace ();
if (per_cu->v.quick->compunit_symtab)
htab_up visited (htab_create_alloc (10,
htab_hash_pointer, htab_eq_pointer,
NULL, xcalloc, xfree));
/* The rule is CUs specify all the files, including those used
by any TU, so there's no need to scan TUs here. We can
ignore file names coming from already-expanded CUs. */
for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
{
void **slot = htab_find_slot (visited.get (),
per_cu->v.quick->file_names,
INSERT);
struct dwarf2_per_cu_data *per_cu = dw2_get_cutu (i);
*slot = per_cu->v.quick->file_names;
if (per_cu->v.quick->compunit_symtab)
{
void **slot = htab_find_slot (visited.get (),
per_cu->v.quick->file_names,
INSERT);
*slot = per_cu->v.quick->file_names;
}
}
for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
{
int j;
struct dwarf2_per_cu_data *per_cu = dw2_get_cu (i);
struct quick_file_names *file_data;
void **slot;
/* We only need to look at symtabs not already expanded. */
if (per_cu->v.quick->compunit_symtab)
continue;
file_data = dw2_get_file_names (per_cu);
if (file_data == NULL)
continue;
slot = htab_find_slot (visited.get (), file_data, INSERT);
if (*slot)
{
/* Already visited. */
continue;
}
*slot = file_data;
for (int j = 0; j < file_data->num_file_names; ++j)
{
const char *filename = file_data->file_names[j];
dwarf2_per_objfile->filenames_cache->seen (filename);
}
}
}
for (i = 0; i < dwarf2_per_objfile->n_comp_units; ++i)
dwarf2_per_objfile->filenames_cache->traverse ([&] (const char *filename)
{
int j;
struct dwarf2_per_cu_data *per_cu = dw2_get_cu (i);
struct quick_file_names *file_data;
void **slot;
const char *this_real_name;
/* We only need to look at symtabs not already expanded. */
if (per_cu->v.quick->compunit_symtab)
continue;
file_data = dw2_get_file_names (per_cu);
if (file_data == NULL)
continue;
slot = htab_find_slot (visited.get (), file_data, INSERT);
if (*slot)
{
/* Already visited. */
continue;
}
*slot = file_data;
for (j = 0; j < file_data->num_file_names; ++j)
{
const char *this_real_name;
if (need_fullname)
this_real_name = dw2_get_real_path (objfile, file_data, j);
else
this_real_name = NULL;
(*fun) (file_data->file_names[j], this_real_name, data);
}
}
if (need_fullname)
this_real_name = gdb_realpath (filename);
else
this_real_name = NULL;
(*fun) (filename, this_real_name, data);
});
}
static int

66
gdb/filename-seen-cache.c Normal file
View File

@ -0,0 +1,66 @@
/* Filename-seen cache for the GNU debugger, GDB.
Copyright (C) 1986-2017 Free Software Foundation, Inc.
This file is part of GDB.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include "defs.h"
#include "filename-seen-cache.h"
#include "filenames.h"
/* Initial size of the table. It automagically grows from here. */
#define INITIAL_FILENAME_SEEN_CACHE_SIZE 100
/* filename_seen_cache constructor. */
filename_seen_cache::filename_seen_cache ()
{
m_tab = htab_create_alloc (INITIAL_FILENAME_SEEN_CACHE_SIZE,
filename_hash, filename_eq,
NULL, xcalloc, xfree);
}
/* See filename-seen-cache.h. */
void
filename_seen_cache::clear ()
{
htab_empty (m_tab);
}
/* See filename-seen-cache.h. */
filename_seen_cache::~filename_seen_cache ()
{
htab_delete (m_tab);
}
/* See filename-seen-cache.h. */
bool
filename_seen_cache::seen (const char *file)
{
void **slot;
/* Is FILE in tab? */
slot = htab_find_slot (m_tab, file, INSERT);
if (*slot != NULL)
return true;
/* No; add it to tab. */
*slot = (char *) file;
return false;
}

63
gdb/filename-seen-cache.h Normal file
View File

@ -0,0 +1,63 @@
/* Filename-seen cache for the GNU debugger, GDB.
Copyright (C) 1986-2017 Free Software Foundation, Inc.
This file is part of GDB.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include "defs.h"
#include "common/function-view.h"
/* Cache to watch for file names already seen. */
class filename_seen_cache
{
public:
filename_seen_cache ();
~filename_seen_cache ();
/* Disable copy. */
filename_seen_cache (const filename_seen_cache &) = delete;
void operator= (const filename_seen_cache &) = delete;
/* Empty the cache, but do not delete it. */
void clear ();
/* If FILE is not already in the table of files in CACHE, add it and
return false; otherwise return true.
NOTE: We don't manage space for FILE, we assume FILE lives as
long as the caller needs. */
bool seen (const char *file);
/* Traverse all cache entries, calling CALLBACK on each. The
filename is passed as argument to CALLBACK. */
void traverse (gdb::function_view<void (const char *filename)> callback)
{
auto erased_cb = [] (void **slot, void *info) -> int
{
auto filename = (const char *) *slot;
auto restored_cb = (decltype (callback) *) info;
(*restored_cb) (filename);
return 1;
};
htab_traverse_noresize (m_tab, erased_cb, &callback);
}
private:
/* Table of files seen so far. */
htab_t m_tab;
};

View File

@ -63,6 +63,7 @@
#include "completer.h"
#include "progspace-and-thread.h"
#include "common/gdb_optional.h"
#include "filename-seen-cache.h"
/* Forward declarations for local functions. */
@ -3968,74 +3969,6 @@ operator_chars (const char *p, const char **end)
}
/* Cache to watch for file names already seen by filename_seen. */
struct filename_seen_cache
{
/* Table of files seen so far. */
htab_t tab;
/* Initial size of the table. It automagically grows from here. */
#define INITIAL_FILENAME_SEEN_CACHE_SIZE 100
};
/* filename_seen_cache constructor. */
static struct filename_seen_cache *
create_filename_seen_cache (void)
{
struct filename_seen_cache *cache = XNEW (struct filename_seen_cache);
cache->tab = htab_create_alloc (INITIAL_FILENAME_SEEN_CACHE_SIZE,
filename_hash, filename_eq,
NULL, xcalloc, xfree);
return cache;
}
/* Empty the cache, but do not delete it. */
static void
clear_filename_seen_cache (struct filename_seen_cache *cache)
{
htab_empty (cache->tab);
}
/* filename_seen_cache destructor.
This takes a void * argument as it is generally used as a cleanup. */
static void
delete_filename_seen_cache (void *ptr)
{
struct filename_seen_cache *cache = (struct filename_seen_cache *) ptr;
htab_delete (cache->tab);
xfree (cache);
}
/* If FILE is not already in the table of files in CACHE, return zero;
otherwise return non-zero. Optionally add FILE to the table if ADD
is non-zero.
NOTE: We don't manage space for FILE, we assume FILE lives as long
as the caller needs. */
static int
filename_seen (struct filename_seen_cache *cache, const char *file, int add)
{
void **slot;
/* Is FILE in tab? */
slot = htab_find_slot (cache->tab, file, add ? INSERT : NO_INSERT);
if (*slot != NULL)
return 1;
/* No; maybe add it to tab. */
if (add)
*slot = (char *) file;
return 0;
}
/* Data structure to maintain printing state for output_source_filename. */
struct output_source_filename_data
@ -4065,7 +3998,7 @@ output_source_filename (const char *name,
symtabs; it doesn't hurt to check. */
/* Was NAME already seen? */
if (filename_seen (data->filename_seen_cache, name, 1))
if (data->filename_seen_cache->seen (name))
{
/* Yes; don't print it again. */
return;
@ -4097,16 +4030,15 @@ sources_info (char *ignore, int from_tty)
struct symtab *s;
struct objfile *objfile;
struct output_source_filename_data data;
struct cleanup *cleanups;
if (!have_full_symbols () && !have_partial_symbols ())
{
error (_("No symbol table is loaded. Use the \"file\" command."));
}
data.filename_seen_cache = create_filename_seen_cache ();
cleanups = make_cleanup (delete_filename_seen_cache,
data.filename_seen_cache);
filename_seen_cache filenames_seen;
data.filename_seen_cache = &filenames_seen;
printf_filtered ("Source files for which symbols have been read in:\n\n");
@ -4122,13 +4054,11 @@ sources_info (char *ignore, int from_tty)
printf_filtered ("Source files for which symbols "
"will be read in on demand:\n\n");
clear_filename_seen_cache (data.filename_seen_cache);
filenames_seen.clear ();
data.first = 1;
map_symbol_filenames (output_partial_symbol_filename, &data,
1 /*need_fullname*/);
printf_filtered ("\n");
do_cleanups (cleanups);
}
/* Compare FILE against all the NFILES entries of FILES. If BASENAMES is
@ -5552,7 +5482,7 @@ maybe_add_partial_symtab_filename (const char *filename, const char *fullname,
if (not_interesting_fname (filename))
return;
if (!filename_seen (data->filename_seen_cache, filename, 1)
if (!data->filename_seen_cache->seen (filename)
&& filename_ncmp (filename, data->text, data->text_len) == 0)
{
/* This file matches for a completion; add it to the
@ -5564,7 +5494,7 @@ maybe_add_partial_symtab_filename (const char *filename, const char *fullname,
const char *base_name = lbasename (filename);
if (base_name != filename
&& !filename_seen (data->filename_seen_cache, base_name, 1)
&& !data->filename_seen_cache->seen (base_name)
&& filename_ncmp (base_name, data->text, data->text_len) == 0)
add_filename_to_list (base_name, data->text, data->word, data->list);
}
@ -5585,23 +5515,20 @@ make_source_files_completion_list (const char *text, const char *word)
VEC (char_ptr) *list = NULL;
const char *base_name;
struct add_partial_filename_data datum;
struct filename_seen_cache *filename_seen_cache;
struct cleanup *back_to, *cache_cleanup;
struct cleanup *back_to;
if (!have_full_symbols () && !have_partial_symbols ())
return list;
back_to = make_cleanup (do_free_completion_list, &list);
filename_seen_cache = create_filename_seen_cache ();
cache_cleanup = make_cleanup (delete_filename_seen_cache,
filename_seen_cache);
filename_seen_cache filenames_seen;
ALL_FILETABS (objfile, cu, s)
{
if (not_interesting_fname (s->filename))
continue;
if (!filename_seen (filename_seen_cache, s->filename, 1)
if (!filenames_seen.seen (s->filename)
&& filename_ncmp (s->filename, text, text_len) == 0)
{
/* This file matches for a completion; add it to the current
@ -5616,13 +5543,13 @@ make_source_files_completion_list (const char *text, const char *word)
command do when they parse file names. */
base_name = lbasename (s->filename);
if (base_name != s->filename
&& !filename_seen (filename_seen_cache, base_name, 1)
&& !filenames_seen.seen (base_name)
&& filename_ncmp (base_name, text, text_len) == 0)
add_filename_to_list (base_name, text, word, &list);
}
}
datum.filename_seen_cache = filename_seen_cache;
datum.filename_seen_cache = &filenames_seen;
datum.text = text;
datum.word = word;
datum.text_len = text_len;
@ -5630,7 +5557,6 @@ make_source_files_completion_list (const char *text, const char *word)
map_symbol_filenames (maybe_add_partial_symtab_filename, &datum,
0 /*need_fullname*/);
do_cleanups (cache_cleanup);
discard_cleanups (back_to);
return list;