ld: Add publics stream to PDB files

This commit is contained in:
Mark Harmstone 2022-10-31 00:15:54 +00:00 committed by Alan Modra
parent a726722240
commit 0882710510
6 changed files with 560 additions and 9 deletions

347
ld/pdb.c
View File

@ -383,10 +383,31 @@ get_arch_number (bfd *abfd)
return IMAGE_FILE_MACHINE_I386;
}
/* Return the index of a given output section. */
static uint16_t
find_section_number (bfd *abfd, asection *sect)
{
uint16_t i = 1;
for (asection *s = abfd->sections; s; s = s->next)
{
if (s == sect)
return i;
/* Empty sections aren't output. */
if (s->size != 0)
i++;
}
return 0;
}
/* Stream 4 is the debug information (DBI) stream. */
static bool
populate_dbi_stream (bfd *stream, bfd *abfd,
uint16_t section_header_stream_num)
uint16_t section_header_stream_num,
uint16_t sym_rec_stream_num,
uint16_t publics_stream_num)
{
struct pdb_dbi_stream_header h;
struct optional_dbg_header opt;
@ -396,9 +417,9 @@ populate_dbi_stream (bfd *stream, bfd *abfd,
bfd_putl32 (1, &h.age);
bfd_putl16 (0xffff, &h.global_stream_index);
bfd_putl16 (0x8e1d, &h.build_number); // MSVC 14.29
bfd_putl16 (0xffff, &h.public_stream_index);
bfd_putl16 (publics_stream_num, &h.public_stream_index);
bfd_putl16 (0, &h.pdb_dll_version);
bfd_putl16 (0xffff, &h.sym_record_stream);
bfd_putl16 (sym_rec_stream_num, &h.sym_record_stream);
bfd_putl16 (0, &h.pdb_dll_rbld);
bfd_putl32 (0, &h.mod_info_size);
bfd_putl32 (0, &h.section_contribution_size);
@ -433,6 +454,293 @@ populate_dbi_stream (bfd *stream, bfd *abfd,
return true;
}
/* Used as parameter to qsort, to sort publics by hash. */
static int
public_compare_hash (const void *s1, const void *s2)
{
const struct public *p1 = *(const struct public **) s1;
const struct public *p2 = *(const struct public **) s2;
if (p1->hash < p2->hash)
return -1;
if (p1->hash > p2->hash)
return 1;
return 0;
}
/* Used as parameter to qsort, to sort publics by address. */
static int
public_compare_addr (const void *s1, const void *s2)
{
const struct public *p1 = *(const struct public **) s1;
const struct public *p2 = *(const struct public **) s2;
if (p1->section < p2->section)
return -1;
if (p1->section > p2->section)
return 1;
if (p1->address < p2->address)
return -1;
if (p1->address > p2->address)
return 1;
return 0;
}
/* The publics stream is a hash map of S_PUB32 records, which are stored
in the symbol record stream. Each S_PUB32 entry represents a symbol
from the point of view of the linker: a section index, an offset within
the section, and a mangled name. Compare with S_GDATA32 and S_GPROC32,
which are the same thing but generated by the compiler. */
static bool
populate_publics_stream (bfd *stream, bfd *abfd, bfd *sym_rec_stream)
{
struct publics_header header;
struct globals_hash_header hash_header;
const unsigned int num_buckets = 4096;
unsigned int num_entries = 0, filled_buckets = 0;
unsigned int buckets_size, sym_hash_size;
char int_buf[sizeof (uint32_t)];
struct public *publics_head = NULL, *publics_tail = NULL;
struct public **buckets;
struct public **sorted = NULL;
bool ret = false;
buckets = xmalloc (sizeof (struct public *) * num_buckets);
memset (buckets, 0, sizeof (struct public *) * num_buckets);
/* Loop through the global symbols in our input files, and write S_PUB32
records in the symbol record stream for those that make it into the
final image. */
for (bfd *in = coff_data (abfd)->link_info->input_bfds; in;
in = in->link.next)
{
for (unsigned int i = 0; i < in->symcount; i++)
{
struct bfd_symbol *sym = in->outsymbols[i];
if (sym->flags & BSF_GLOBAL)
{
struct pubsym ps;
uint16_t record_length;
const char *name = sym->name;
size_t name_len = strlen (name);
struct public *p = xmalloc (sizeof (struct public));
unsigned int padding = 0;
uint16_t section;
uint32_t flags = 0;
section =
find_section_number (abfd, sym->section->output_section);
if (section == 0)
continue;
p->next = NULL;
p->offset = bfd_tell (sym_rec_stream);
p->hash = calc_hash (name, name_len) % num_buckets;
p->section = section;
p->address = sym->section->output_offset + sym->value;
record_length = sizeof (struct pubsym) + name_len + 1;
if (record_length % 4)
padding = 4 - (record_length % 4);
/* Assume that all global symbols in executable sections
are functions. */
if (sym->section->flags & SEC_CODE)
flags = PUBSYM_FUNCTION;
bfd_putl16 (record_length + padding - sizeof (uint16_t),
&ps.record_length);
bfd_putl16 (S_PUB32, &ps.record_type);
bfd_putl32 (flags, &ps.flags);
bfd_putl32 (p->address, &ps.offset);
bfd_putl16 (p->section, &ps.section);
if (bfd_bwrite (&ps, sizeof (struct pubsym), sym_rec_stream) !=
sizeof (struct pubsym))
goto end;
if (bfd_bwrite (name, name_len + 1, sym_rec_stream) !=
name_len + 1)
goto end;
for (unsigned int j = 0; j < padding; j++)
{
uint8_t b = 0;
if (bfd_bwrite (&b, sizeof (uint8_t), sym_rec_stream) !=
sizeof (uint8_t))
goto end;
}
if (!publics_head)
publics_head = p;
else
publics_tail->next = p;
publics_tail = p;
num_entries++;
}
}
}
if (num_entries > 0)
{
/* Create an array of pointers, sorted by hash value. */
sorted = xmalloc (sizeof (struct public *) * num_entries);
struct public *p = publics_head;
for (unsigned int i = 0; i < num_entries; i++)
{
sorted[i] = p;
p = p->next;
}
qsort (sorted, num_entries, sizeof (struct public *),
public_compare_hash);
/* Populate the buckets. */
for (unsigned int i = 0; i < num_entries; i++)
{
if (!buckets[sorted[i]->hash])
{
buckets[sorted[i]->hash] = sorted[i];
filled_buckets++;
}
sorted[i]->index = i;
}
}
buckets_size = num_buckets / 8;
buckets_size += sizeof (uint32_t);
buckets_size += filled_buckets * sizeof (uint32_t);
sym_hash_size = sizeof (hash_header);
sym_hash_size += num_entries * sizeof (struct hash_record);
sym_hash_size += buckets_size;
/* Output the publics header. */
bfd_putl32 (sym_hash_size, &header.sym_hash_size);
bfd_putl32 (num_entries * sizeof (uint32_t), &header.addr_map_size);
bfd_putl32 (0, &header.num_thunks);
bfd_putl32 (0, &header.thunks_size);
bfd_putl32 (0, &header.thunk_table);
bfd_putl32 (0, &header.thunk_table_offset);
bfd_putl32 (0, &header.num_sects);
if (bfd_bwrite (&header, sizeof (header), stream) != sizeof (header))
goto end;
/* Output the global hash header. */
bfd_putl32 (GLOBALS_HASH_SIGNATURE, &hash_header.signature);
bfd_putl32 (GLOBALS_HASH_VERSION_70, &hash_header.version);
bfd_putl32 (num_entries * sizeof (struct hash_record),
&hash_header.entries_size);
bfd_putl32 (buckets_size, &hash_header.buckets_size);
if (bfd_bwrite (&hash_header, sizeof (hash_header), stream) !=
sizeof (hash_header))
goto end;
/* Write the entries in hash order. */
for (unsigned int i = 0; i < num_entries; i++)
{
struct hash_record hr;
bfd_putl32 (sorted[i]->offset + 1, &hr.offset);
bfd_putl32 (1, &hr.reference);
if (bfd_bwrite (&hr, sizeof (hr), stream) != sizeof (hr))
goto end;
}
/* Write the bitmap for filled and unfilled buckets. */
for (unsigned int i = 0; i < num_buckets; i += 8)
{
uint8_t v = 0;
for (unsigned int j = 0; j < 8; j++)
{
if (buckets[i + j])
v |= 1 << j;
}
if (bfd_bwrite (&v, sizeof (v), stream) != sizeof (v))
goto end;
}
/* Add a 4-byte gap. */
bfd_putl32 (0, int_buf);
if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) != sizeof (uint32_t))
goto end;
/* Write the bucket offsets. */
for (unsigned int i = 0; i < num_buckets; i++)
{
if (buckets[i])
{
/* 0xc is size of internal hash_record structure in
Microsoft's parser. */
bfd_putl32 (buckets[i]->index * 0xc, int_buf);
if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) !=
sizeof (uint32_t))
goto end;
}
}
/* Write the address map: offsets into the symbol record stream of
S_PUB32 records, ordered by address. */
if (num_entries > 0)
{
qsort (sorted, num_entries, sizeof (struct public *),
public_compare_addr);
for (unsigned int i = 0; i < num_entries; i++)
{
bfd_putl32 (sorted[i]->offset, int_buf);
if (bfd_bwrite (int_buf, sizeof (uint32_t), stream) !=
sizeof (uint32_t))
goto end;
}
}
ret = true;
end:
free (buckets);
while (publics_head)
{
struct public *p = publics_head->next;
free (publics_head);
publics_head = p;
}
free (sorted);
return ret;
}
/* The section header stream contains a copy of the section headers
from the PE file, in the same format. */
static bool
@ -494,8 +802,9 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid)
{
bfd *pdb;
bool ret = false;
bfd *info_stream, *dbi_stream, *names_stream;
uint16_t section_header_stream_num;
bfd *info_stream, *dbi_stream, *names_stream, *sym_rec_stream,
*publics_stream;
uint16_t section_header_stream_num, sym_rec_stream_num, publics_stream_num;
pdb = bfd_openw (pdb_name, "pdb");
if (!pdb)
@ -554,6 +863,24 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid)
goto end;
}
sym_rec_stream = add_stream (pdb, NULL, &sym_rec_stream_num);
if (!sym_rec_stream)
{
einfo (_("%P: warning: cannot create symbol record stream "
"in PDB file: %E\n"));
goto end;
}
publics_stream = add_stream (pdb, NULL, &publics_stream_num);
if (!publics_stream)
{
einfo (_("%P: warning: cannot create publics stream "
"in PDB file: %E\n"));
goto end;
}
if (!create_section_header_stream (pdb, abfd, &section_header_stream_num))
{
einfo (_("%P: warning: cannot create section header stream "
@ -561,13 +888,21 @@ create_pdb_file (bfd *abfd, const char *pdb_name, const unsigned char *guid)
goto end;
}
if (!populate_dbi_stream (dbi_stream, abfd, section_header_stream_num))
if (!populate_dbi_stream (dbi_stream, abfd, section_header_stream_num,
sym_rec_stream_num, publics_stream_num))
{
einfo (_("%P: warning: cannot populate DBI stream "
"in PDB file: %E\n"));
goto end;
}
if (!populate_publics_stream (publics_stream, abfd, sym_rec_stream))
{
einfo (_("%P: warning: cannot populate publics stream "
"in PDB file: %E\n"));
goto end;
}
if (!populate_info_stream (pdb, info_stream, guid))
{
einfo (_("%P: warning: cannot populate info stream "

View File

@ -28,6 +28,8 @@
#include "bfd.h"
#include <stdbool.h>
#define S_PUB32 0x110e
/* PDBStream70 in pdb1.h */
struct pdb_stream_70
{
@ -91,6 +93,51 @@ struct pdb_dbi_stream_header
#define DBI_STREAM_VERSION_70 19990903
/* PSGSIHDR in gsi.h */
struct publics_header
{
uint32_t sym_hash_size;
uint32_t addr_map_size;
uint32_t num_thunks;
uint32_t thunks_size;
uint32_t thunk_table;
uint32_t thunk_table_offset;
uint32_t num_sects;
};
/* GSIHashHdr in gsi.h */
struct globals_hash_header
{
uint32_t signature;
uint32_t version;
uint32_t entries_size;
uint32_t buckets_size;
};
/* HRFile in gsi.h */
struct hash_record
{
uint32_t offset;
uint32_t reference;
};
#define GLOBALS_HASH_SIGNATURE 0xffffffff
#define GLOBALS_HASH_VERSION_70 0xf12f091a
/* PUBSYM32 in cvinfo.h */
struct pubsym
{
uint16_t record_length;
uint16_t record_type;
uint32_t flags;
uint32_t offset;
uint16_t section;
/* followed by null-terminated string */
} ATTRIBUTE_PACKED;
/* see bitset CV_PUBSYMFLAGS in cvinfo.h */
#define PUBSYM_FUNCTION 0x2
struct optional_dbg_header
{
uint16_t fpo_stream;

View File

@ -395,12 +395,111 @@ proc check_section_stream { img pdb } {
return 1
}
proc get_publics_stream_index { pdb } {
global ar
set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb 0003"]
if ![string match "" $exec_output] {
return -1
}
set fi [open tmpdir/0003]
fconfigure $fi -translation binary
# skip fields
seek $fi 16
# read substream sizes
set data [read $fi 2]
binary scan $data s index
close $fi
return $index
}
proc get_sym_record_stream_index { pdb } {
global ar
set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb 0003"]
if ![string match "" $exec_output] {
return -1
}
set fi [open tmpdir/0003]
fconfigure $fi -translation binary
# skip fields
seek $fi 20
# read substream sizes
set data [read $fi 2]
binary scan $data s index
close $fi
return $index
}
proc check_publics_stream { pdb } {
global ar
global objdump
global srcdir
global subdir
set publics_index [get_publics_stream_index $pdb]
if { $publics_index == -1 } {
return 0
}
set index_str [format "%04x" $publics_index]
set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb $index_str"]
if ![string match "" $exec_output] {
return 0
}
set exp [file_contents "$srcdir/$subdir/pdb1-publics.d"]
set got [run_host_cmd "$objdump" "-s --target=binary tmpdir/$index_str"]
if ![string match $exp $got] {
return 0
}
set sym_record_index [get_sym_record_stream_index $pdb]
if { $sym_record_index == -1 } {
return 0
}
set index_str [format "%04x" $sym_record_index]
set exec_output [run_host_cmd "$ar" "x --output tmpdir $pdb $index_str"]
if ![string match "" $exec_output] {
return 0
}
set exp [file_contents "$srcdir/$subdir/pdb1-sym-record.d"]
set got [run_host_cmd "$objdump" "-s --target=binary tmpdir/$index_str"]
if ![string match $exp $got] {
return 0
}
return 1
}
if ![ld_assemble $as $srcdir/$subdir/pdb1.s tmpdir/pdb1.o] {
unsupported "Build pdb1.o"
return
}
if ![ld_link $ld "tmpdir/pdb1.exe" "--pdb=tmpdir/pdb1.pdb tmpdir/pdb1.o"] {
if ![ld_link $ld "tmpdir/pdb1.exe" "--pdb=tmpdir/pdb1.pdb --gc-sections -e foo tmpdir/pdb1.o"] {
fail "Could not create a PE image with a PDB file"
return
}
@ -441,3 +540,9 @@ if [check_section_stream tmpdir/pdb1.exe tmpdir/pdb1.pdb] {
} else {
fail "Invalid section stream"
}
if [check_publics_stream tmpdir/pdb1.pdb] {
pass "Valid publics stream"
} else {
fail "Invalid publics stream"
}

View File

@ -0,0 +1,41 @@
*: file format binary
Contents of section .data:
0000 2c020000 08000000 00000000 00000000 ,...............
0010 00000000 00000000 00000000 ffffffff ................
0020 1a092ff1 10000000 0c020000 15000000 ../.............
0030 01000000 01000000 01000000 00000000 ................
0040 00000000 00000000 00000000 00000000 ................
0050 00000000 00000000 00000000 00000000 ................
0060 00000000 00000000 00000000 00000000 ................
0070 00000000 00000000 00000000 00000000 ................
0080 00000000 00000000 00000000 00000000 ................
0090 00000000 00000000 00000000 00000000 ................
00a0 00000000 00000000 00000000 00000000 ................
00b0 00000000 00000000 00000000 00000000 ................
00c0 00000000 00000000 00000000 00000000 ................
00d0 00000000 00000000 00000000 00000001 ................
00e0 00000000 00000000 00000000 00000000 ................
00f0 00000000 00000000 00000000 00000000 ................
0100 00000000 00000000 00000000 00000000 ................
0110 00000000 00000000 00000000 00000000 ................
0120 00000000 00000000 00000000 00000000 ................
0130 00000000 00000000 00000000 00000000 ................
0140 00000000 00000000 00000000 00000000 ................
0150 00000000 00000000 00000000 00000000 ................
0160 00000000 00000000 00000000 00000000 ................
0170 00000000 00000000 00000000 00000000 ................
0180 00000000 00000000 00000000 00000000 ................
0190 00000000 00000000 00000000 01000000 ................
01a0 00000000 00000000 00000000 00000000 ................
01b0 00000000 00000000 00000000 00000000 ................
01c0 00000000 00000000 00000000 00000000 ................
01d0 00000000 00000000 00000000 00000000 ................
01e0 00000000 00000000 00000000 00000000 ................
01f0 00000000 00000000 00000000 00000000 ................
0200 00000000 00000000 00000000 00000000 ................
0210 00000000 00000000 00000000 00000000 ................
0220 00000000 00000000 00000000 00000000 ................
0230 00000000 00000000 00000000 00000000 ................
0240 00000000 0c000000 00000000 14000000 ................

View File

@ -0,0 +1,7 @@
*: file format binary
Contents of section .data:
0000 12000e11 02000000 08000000 0100666f ..............fo
0010 6f000000 12000e11 00000000 04000000 o...............
0020 02006261 72000000 ..bar...

View File

@ -1,5 +1,21 @@
.text
.global foo
foo:
.long 0x12345678
.long 0x9abcdef0
.global foo
foo: # section 0001, offset 00000008
.secrel32 bar
.data
.long 0x12345678
.global bar
bar: # section 0002, offset 00000004
.long 0x9abcdef0
.section "gcsect"
.global baz
baz: # unreferenced, will be GC'd out
.long 0x12345678