Relax NoteAny parsing to allow notes with names that contain invalid utf8 sequences

We want NoteAny to be a catch-all note type that can represent any valid note that we don't know how to parse into something more specific. The ELF spec only claims: "The first namesz bytes in name contain a null-terminated character representation of the entry's owner or originator.", and while it does say "null-terminated character representaion", it does not specify that those bytes must be valid UTF8, so we were being unnecessarily strict in forcing all parsed notes to have valid utf8 names. All the standard note types I know about use simple ascii sequences.
2023-06-06 11:34:54 -07:00 · 2023-06-06 11:34:54 -07:00 · d29e9964dc
commit d29e9964dc
parent 10957838b8
2 changed files with 32 additions and 10 deletions
--- a/src/abi.rs
+++ b/src/abi.rs
@ -787,7 +787,7 @@ pub const STB_LOPROC: u8 = 13;
 /// for processor-specific semantics.
 pub const STB_HIPROC: u8 = 15;

-/// STV_* define constants for the ELF Symbol's st_visibility (encoded in the st_other field).
+// STV_* define constants for the ELF Symbol's st_visibility (encoded in the st_other field).

 /// The visibility of symbols with the STV_DEFAULT attribute is as specified by
 /// the symbol's binding type.  That is, global and weak symbols are visible
@ -1148,7 +1148,7 @@ pub const ELFCOMPRESS_LOPROC: u32 = 0x70000000;
 pub const ELFCOMPRESS_HIPROC: u32 = 0x7fffffff;

 /// GNU-extension notes have this name
-pub const ELF_NOTE_GNU: &str = "GNU";
+pub const ELF_NOTE_GNU: &[u8] = b"GNU\0";

 // Note header descriptor types constants (n_type)

--- a/src/note.rs
+++ b/src/note.rs
@ -36,7 +36,6 @@ use crate::endian::EndianParse;
 use crate::file::Class;
 use crate::parse::{ParseAt, ParseError, ReadBytesExt};
 use core::mem::size_of;
-use core::str::from_utf8;

 /// This enum contains parsed Note variants which can be matched on
 #[derive(Debug, PartialEq, Eq)]
@ -68,12 +67,11 @@ impl<'data> Note<'data> {
        let nhdr = NoteHeader::parse_at(endian, Class::ELF32, offset, data)?;

        let name_start = *offset;
-        let name_size: usize = nhdr.n_namesz.saturating_sub(1).try_into()?;
+        let name_size: usize = nhdr.n_namesz.try_into()?;
        let name_end = name_start
            .checked_add(name_size)
            .ok_or(ParseError::IntegerOverflow)?;
-        let name_buf = data.get_bytes(name_start..name_end)?;
-        let name = from_utf8(name_buf)?;
+        let name = data.get_bytes(name_start..name_end)?;
        *offset = name_end;

        // skip over padding if needed to get back to 4-byte alignment
@ -173,7 +171,7 @@ pub struct NoteGnuBuildId<'data>(pub &'data [u8]);
 #[derive(Debug, PartialEq, Eq)]
 pub struct NoteAny<'data> {
    pub n_type: u64,
-    pub name: &'data str,
+    pub name: &'data [u8],
    pub desc: &'data [u8],
 }

@ -320,6 +318,7 @@ mod parse_tests {
        Note::parse_at(LittleEndian, Class::ELF64, 0, &mut offset, &data)
            .expect_err("Should have gotten an alignment error");
    }
+
    #[test]
    fn parse_note_with_8_byte_alignment() {
        // This is a .note.gnu.property section, which has been seen generated with 8-byte alignment
@ -395,7 +394,7 @@ mod parse_tests {
            note,
            Note::Unknown(NoteAny {
                n_type: 6,
-                name: "",
+                name: &[],
                desc: &[0x20, 0x0],
            })
        );
@ -419,7 +418,7 @@ mod parse_tests {
            note,
            Note::Unknown(NoteAny {
                n_type: 1,
-                name: "GN",
+                name: b"GN\0",
                desc: &[0x01, 0x02, 0x03, 0x04],
            })
        );
@ -466,7 +465,7 @@ mod parse_tests {
            note,
            Note::Unknown(NoteAny {
                n_type: 0x42,
-                name: "",
+                name: &[],
                desc: &[0x20, 0x0],
            })
        );
@ -496,6 +495,29 @@ mod parse_tests {
        assert_eq!(offset, 16);
    }

+    #[test]
+    fn parse_note_any_with_invalid_utf8_name() {
+        let data = [
+            0x04, 0x00, 0x00, 0x00, // namesz 4
+            0x00, 0x00, 0x00, 0x00, // descsz 0
+            0x42, 0x00, 0x00, 0x00, // type 42 (unknown)
+            0x47, 0xc3, 0x28, 0x00, // name G..\0 (dots are an invalid utf8 sequence)
+        ];
+
+        let mut offset = 0;
+        let note = Note::parse_at(LittleEndian, Class::ELF32, 4, &mut offset, &data)
+            .expect("Failed to parse");
+        assert_eq!(
+            note,
+            Note::Unknown(NoteAny {
+                n_type: 0x42,
+                name: &[0x47, 0xc3, 0x28, 0x0],
+                desc: &[],
+            })
+        );
+        assert_eq!(offset, 16);
+    }
+
    use crate::parse::{test_parse_for, test_parse_fuzz_too_short};

    #[test]