// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package goobj implements reading of Go object files and archives. // // TODO(rsc): Decide where this package should live. (golang.org/issue/6932) // TODO(rsc): Decide the appropriate integer types for various fields. // TODO(rsc): Write tests. (File format still up in the air a little.) package goobj import ( "bufio" "bytes" "errors" "fmt" "io" "strconv" "strings" ) // A SymKind describes the kind of memory represented by a symbol. type SymKind int // This list is taken from include/link.h. // Defined SymKind values. // TODO(rsc): Give idiomatic Go names. // TODO(rsc): Reduce the number of symbol types in the object files. const ( _ SymKind = iota // readonly, executable STEXT SELFRXSECT // readonly, non-executable STYPE SSTRING SGOSTRING SGOFUNC SRODATA SFUNCTAB STYPELINK SSYMTAB // TODO: move to unmapped section SPCLNTAB SELFROSECT // writable, non-executable SMACHOPLT SELFSECT SMACHO // Mach-O __nl_symbol_ptr SMACHOGOT SNOPTRDATA SINITARR SDATA SWINDOWS SBSS SNOPTRBSS STLSBSS // not mapped SXREF SMACHOSYMSTR SMACHOSYMTAB SMACHOINDIRECTPLT SMACHOINDIRECTGOT SFILE SFILEPATH SCONST SDYNIMPORT SHOSTOBJ ) var symKindStrings = []string{ SBSS: "SBSS", SCONST: "SCONST", SDATA: "SDATA", SDYNIMPORT: "SDYNIMPORT", SELFROSECT: "SELFROSECT", SELFRXSECT: "SELFRXSECT", SELFSECT: "SELFSECT", SFILE: "SFILE", SFILEPATH: "SFILEPATH", SFUNCTAB: "SFUNCTAB", SGOFUNC: "SGOFUNC", SGOSTRING: "SGOSTRING", SHOSTOBJ: "SHOSTOBJ", SINITARR: "SINITARR", SMACHO: "SMACHO", SMACHOGOT: "SMACHOGOT", SMACHOINDIRECTGOT: "SMACHOINDIRECTGOT", SMACHOINDIRECTPLT: "SMACHOINDIRECTPLT", SMACHOPLT: "SMACHOPLT", SMACHOSYMSTR: "SMACHOSYMSTR", SMACHOSYMTAB: "SMACHOSYMTAB", SNOPTRBSS: "SNOPTRBSS", SNOPTRDATA: "SNOPTRDATA", SPCLNTAB: "SPCLNTAB", SRODATA: "SRODATA", SSTRING: "SSTRING", SSYMTAB: "SSYMTAB", STEXT: "STEXT", STLSBSS: "STLSBSS", STYPE: "STYPE", STYPELINK: "STYPELINK", SWINDOWS: "SWINDOWS", SXREF: "SXREF", } func (k SymKind) String() string { if k < 0 || int(k) >= len(symKindStrings) { return fmt.Sprintf("SymKind(%d)", k) } return symKindStrings[k] } // A Sym is a named symbol in an object file. type Sym struct { SymID // symbol identifier (name and version) Kind SymKind // kind of symbol DupOK bool // are duplicate definitions okay? Size int // size of corresponding data Type SymID // symbol for Go type information Data Data // memory image of symbol Reloc []Reloc // relocations to apply to Data Func *Func // additional data for functions } // A SymID - the combination of Name and Version - uniquely identifies // a symbol within a package. type SymID struct { // Name is the name of a symbol. Name string // Version is zero for symbols with global visibility. // Symbols with only file visibility (such as file-level static // declarations in C) have a non-zero version distinguising // a symbol in one file from a symbol of the same name // in another file Version int } func (s SymID) String() string { if s.Version == 0 { return s.Name } return fmt.Sprintf("%s<%d>", s.Name, s.Version) } // A Data is a reference to data stored in an object file. // It records the offset and size of the data, so that a client can // read the data only if necessary. type Data struct { Offset int64 Size int64 } // A Reloc describes a relocation applied to a memory image to refer // to an address within a particular symbol. type Reloc struct { // The bytes at [Offset, Offset+Size) within the memory image // should be updated to refer to the address Add bytes after the start // of the symbol Sym. Offset int Size int Sym SymID Add int // The Type records the form of address expected in the bytes // described by the previous fields: absolute, PC-relative, and so on. // TODO(rsc): The interpretation of Type is not exposed by this package. Type int } // A Var describes a variable in a function stack frame: a declared // local variable, an input argument, or an output result. type Var struct { // The combination of Name, Kind, and Offset uniquely // identifies a variable in a function stack frame. // Using fewer of these - in particular, using only Name - does not. Name string // Name of variable. Kind int // TODO(rsc): Define meaning. Offset int // Frame offset. TODO(rsc): Define meaning. Type SymID // Go type for variable. } // Func contains additional per-symbol information specific to functions. type Func struct { Args int // size in bytes of of argument frame: inputs and outputs Frame int // size in bytes of local variable frame Var []Var // detail about local variables PCSP Data // PC → SP offset map PCFile Data // PC → file number map (index into File) PCLine Data // PC → line number map PCData []Data // PC → runtime support data map FuncData []FuncData // non-PC-specific runtime support data File []string // paths indexed by PCFile } // TODO: Add PCData []byte and PCDataIter (similar to liblink). // A FuncData is a single function-specific data value. type FuncData struct { Sym SymID // symbol holding data Offset int64 // offset into symbol for funcdata pointer } // A Package is a parsed Go object file or archive defining a Go package. type Package struct { ImportPath string // import path denoting this package Imports []string // packages imported by this package Syms []*Sym // symbols defined by this package MaxVersion int // maximum Version in any SymID in Syms } var ( archiveHeader = []byte("!\n") archiveMagic = []byte("`\n") goobjHeader = []byte("go objec") // truncated to size of archiveHeader errCorruptArchive = errors.New("corrupt archive") errTruncatedArchive = errors.New("truncated archive") errNotArchive = errors.New("unrecognized archive format") errCorruptObject = errors.New("corrupt object file") errTruncatedObject = errors.New("truncated object file") errNotObject = errors.New("unrecognized object file format") ) // An objReader is an object file reader. type objReader struct { p *Package b *bufio.Reader f io.ReadSeeker err error offset int64 limit int64 tmp [256]byte pkg string pkgprefix string } // importPathToPrefix returns the prefix that will be used in the // final symbol table for the given import path. // We escape '%', '"', all control characters and non-ASCII bytes, // and any '.' after the final slash. // // See ../../../cmd/ld/lib.c:/^pathtoprefix and // ../../../cmd/gc/subr.c:/^pathtoprefix. func importPathToPrefix(s string) string { // find index of last slash, if any, or else -1. // used for determining whether an index is after the last slash. slash := strings.LastIndex(s, "/") // check for chars that need escaping n := 0 for r := 0; r < len(s); r++ { if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { n++ } } // quick exit if n == 0 { return s } // escape const hex = "0123456789abcdef" p := make([]byte, 0, len(s)+2*n) for r := 0; r < len(s); r++ { if c := s[r]; c <= ' ' || (c == '.' && r > slash) || c == '%' || c == '"' || c >= 0x7F { p = append(p, '%', hex[c>>4], hex[c&0xF]) } else { p = append(p, c) } } return string(p) } // init initializes r to read package p from f. func (r *objReader) init(f io.ReadSeeker, p *Package) { r.f = f r.p = p r.offset, _ = f.Seek(0, 1) r.limit, _ = f.Seek(0, 2) f.Seek(r.offset, 0) r.b = bufio.NewReader(f) r.pkgprefix = importPathToPrefix(p.ImportPath) + "." } // error records that an error occurred. // It returns only the first error, so that an error // caused by an earlier error does not discard information // about the earlier error. func (r *objReader) error(err error) error { if r.err == nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.err = err } // panic("corrupt") // useful for debugging return r.err } // readByte reads and returns a byte from the input file. // On I/O error or EOF, it records the error but returns byte 0. // A sequence of 0 bytes will eventually terminate any // parsing state in the object file. In particular, it ends the // reading of a varint. func (r *objReader) readByte() byte { if r.err != nil { return 0 } if r.offset >= r.limit { r.error(io.ErrUnexpectedEOF) return 0 } b, err := r.b.ReadByte() if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } r.error(err) b = 0 } else { r.offset++ } return b } // read reads exactly len(b) bytes from the input file. // If an error occurs, read returns the error but also // records it, so it is safe for callers to ignore the result // as long as delaying the report is not a problem. func (r *objReader) readFull(b []byte) error { if r.err != nil { return r.err } if r.offset+int64(len(b)) > r.limit { return r.error(io.ErrUnexpectedEOF) } n, err := io.ReadFull(r.b, b) r.offset += int64(n) if err != nil { return r.error(err) } return nil } // readInt reads a zigzag varint from the input file. func (r *objReader) readInt() int { var u uint64 for shift := uint(0); ; shift += 7 { if shift >= 64 { r.error(errCorruptObject) return 0 } c := r.readByte() u |= uint64(c&0x7F) << shift if c&0x80 == 0 { break } } v := int64(u>>1) ^ (int64(u) << 63 >> 63) if int64(int(v)) != v { r.error(errCorruptObject) // TODO return 0 } return int(v) } // readString reads a length-delimited string from the input file. func (r *objReader) readString() string { n := r.readInt() buf := make([]byte, n) r.readFull(buf) return string(buf) } // readSymID reads a SymID from the input file. func (r *objReader) readSymID() SymID { name, vers := r.readString(), r.readInt() // In a symbol name in an object file, "". denotes the // prefix for the package in which the object file has been found. // Expand it. name = strings.Replace(name, `"".`, r.pkgprefix, -1) // An individual object file only records version 0 (extern) or 1 (static). // To make static symbols unique across all files being read, we // replace version 1 with the version corresponding to the current // file number. The number is incremented on each call to parseObject. if vers != 0 { vers = r.p.MaxVersion } return SymID{name, vers} } // readData reads a data reference from the input file. func (r *objReader) readData() Data { n := r.readInt() d := Data{Offset: r.offset, Size: int64(n)} r.skip(int64(n)) return d } // skip skips n bytes in the input. func (r *objReader) skip(n int64) { if n < 0 { r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip")) } if n < int64(len(r.tmp)) { // Since the data is so small, a just reading from the buffered // reader is better than flushing the buffer and seeking. r.readFull(r.tmp[:n]) } else if n <= int64(r.b.Buffered()) { // Even though the data is not small, it has already been read. // Advance the buffer instead of seeking. for n > int64(len(r.tmp)) { r.readFull(r.tmp[:]) n -= int64(len(r.tmp)) } r.readFull(r.tmp[:n]) } else { // Seek, giving up buffered data. _, err := r.f.Seek(r.offset+n, 0) if err != nil { r.error(err) } r.offset += n r.b.Reset(r.f) } } // Parse parses an object file or archive from r, // assuming that its import path is pkgpath. func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) { if pkgpath == "" { pkgpath = `""` } p := new(Package) p.ImportPath = pkgpath var rd objReader rd.init(r, p) err := rd.readFull(rd.tmp[:8]) if err != nil { if err == io.EOF { err = io.ErrUnexpectedEOF } return nil, err } switch { default: return nil, errNotObject case bytes.Equal(rd.tmp[:8], archiveHeader): if err := rd.parseArchive(); err != nil { return nil, err } case bytes.Equal(rd.tmp[:8], goobjHeader): if err := rd.parseObject(goobjHeader); err != nil { return nil, err } } return p, nil } // trimSpace removes trailing spaces from b and returns the corresponding string. // This effectively parses the form used in archive headers. func trimSpace(b []byte) string { return string(bytes.TrimRight(b, " ")) } // parseArchive parses a Unix archive of Go object files. // TODO(rsc): Need to skip non-Go object files. // TODO(rsc): Maybe record table of contents in r.p so that // linker can avoid having code to parse archives too. func (r *objReader) parseArchive() error { for r.offset < r.limit { if err := r.readFull(r.tmp[:60]); err != nil { return err } data := r.tmp[:60] // Each file is preceded by this text header (slice indices in first column): // 0:16 name // 16:28 date // 28:34 uid // 34:40 gid // 40:48 mode // 48:58 size // 58:60 magic - `\n // We only care about name, size, and magic. // The fields are space-padded on the right. // The size is in decimal. // The file data - size bytes - follows the header. // Headers are 2-byte aligned, so if size is odd, an extra padding // byte sits between the file data and the next header. // The file data that follows is padded to an even number of bytes: // if size is odd, an extra padding byte is inserted betw the next header. if len(data) < 60 { return errTruncatedArchive } if !bytes.Equal(data[58:60], archiveMagic) { return errCorruptArchive } name := trimSpace(data[0:16]) size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64) if err != nil { return errCorruptArchive } data = data[60:] fsize := size + size&1 if fsize < 0 || fsize < size { return errCorruptArchive } switch name { case "__.SYMDEF", "__.GOSYMDEF", "__.PKGDEF": r.skip(size) default: oldLimit := r.limit r.limit = r.offset + size if err := r.parseObject(nil); err != nil { return fmt.Errorf("parsing archive member %q: %v", name, err) } r.skip(r.limit - r.offset) r.limit = oldLimit } if size&1 != 0 { r.skip(1) } } return nil } // parseObject parses a single Go object file. // The prefix is the bytes already read from the file, // typically in order to detect that this is an object file. // The object file consists of a textual header ending in "\n!\n" // and then the part we want to parse begins. // The format of that part is defined in a comment at the top // of src/liblink/objfile.c. func (r *objReader) parseObject(prefix []byte) error { // TODO(rsc): Maybe use prefix and the initial input to // record the header line from the file, which would // give the architecture and other version information. r.p.MaxVersion++ var c1, c2, c3 byte for { c1, c2, c3 = c2, c3, r.readByte() if c3 == 0 { // NUL or EOF, either is bad return errCorruptObject } if c1 == '\n' && c2 == '!' && c3 == '\n' { break } } r.readFull(r.tmp[:8]) if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go13ld")) { return r.error(errCorruptObject) } // Direct package dependencies. for { s := r.readString() if s == "" { break } r.p.Imports = append(r.p.Imports, s) } // Symbols. for { if b := r.readByte(); b != 0xfe { if b != 0xff { return r.error(errCorruptObject) } break } typ := r.readInt() s := &Sym{SymID: r.readSymID()} r.p.Syms = append(r.p.Syms, s) s.Kind = SymKind(typ) s.DupOK = r.readInt() != 0 s.Size = r.readInt() s.Type = r.readSymID() s.Data = r.readData() s.Reloc = make([]Reloc, r.readInt()) for i := range s.Reloc { rel := &s.Reloc[i] rel.Offset = r.readInt() rel.Size = r.readInt() rel.Type = r.readInt() rel.Add = r.readInt() r.readInt() // Xadd - ignored rel.Sym = r.readSymID() r.readSymID() // Xsym - ignored } if s.Kind == STEXT { f := new(Func) s.Func = f f.Args = r.readInt() f.Frame = r.readInt() f.Var = make([]Var, r.readInt()) for i := range f.Var { v := &f.Var[i] v.Name = r.readSymID().Name v.Offset = r.readInt() v.Kind = r.readInt() v.Type = r.readSymID() } f.PCSP = r.readData() f.PCFile = r.readData() f.PCLine = r.readData() f.PCData = make([]Data, r.readInt()) for i := range f.PCData { f.PCData[i] = r.readData() } f.FuncData = make([]FuncData, r.readInt()) for i := range f.FuncData { f.FuncData[i].Sym = r.readSymID() } for i := range f.FuncData { f.FuncData[i].Offset = int64(r.readInt()) // TODO } f.File = make([]string, r.readInt()) for i := range f.File { f.File[i] = r.readSymID().Name } } } r.readFull(r.tmp[:7]) if !bytes.Equal(r.tmp[:7], []byte("\xffgo13ld")) { return r.error(errCorruptObject) } return nil }