// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/browser/zygote_host/zygote_host_impl_linux.h" #include #include #include #include #include #include "base/base_switches.h" #include "base/command_line.h" #include "base/environment.h" #include "base/files/file_enumerator.h" #include "base/files/file_util.h" #include "base/files/scoped_file.h" #include "base/linux_util.h" #include "base/logging.h" #include "base/memory/linked_ptr.h" #include "base/memory/scoped_ptr.h" #include "base/memory/scoped_vector.h" #include "base/metrics/histogram.h" #include "base/metrics/sparse_histogram.h" #include "base/path_service.h" #include "base/posix/eintr_wrapper.h" #include "base/posix/unix_domain_socket_linux.h" #include "base/process/launch.h" #include "base/process/memory.h" #include "base/process/process_handle.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/time/time.h" #include "content/browser/renderer_host/render_sandbox_host_linux.h" #include "content/common/child_process_sandbox_support_impl_linux.h" #include "content/common/zygote_commands_linux.h" #include "content/public/browser/content_browser_client.h" #include "content/public/common/content_switches.h" #include "content/public/common/result_codes.h" #include "sandbox/linux/services/credentials.h" #include "sandbox/linux/services/namespace_sandbox.h" #include "sandbox/linux/services/namespace_utils.h" #include "sandbox/linux/suid/client/setuid_sandbox_host.h" #include "sandbox/linux/suid/common/sandbox.h" #include "ui/base/ui_base_switches.h" #include "ui/gfx/switches.h" #if defined(USE_TCMALLOC) #include "third_party/tcmalloc/chromium/src/gperftools/heap-profiler.h" #endif namespace content { namespace { // Receive a fixed message on fd and return the sender's PID. // Returns true if the message received matches the expected message. bool ReceiveFixedMessage(int fd, const char* expect_msg, size_t expect_len, base::ProcessId* sender_pid) { char buf[expect_len + 1]; ScopedVector fds_vec; const ssize_t len = UnixDomainSocket::RecvMsgWithPid( fd, buf, sizeof(buf), &fds_vec, sender_pid); if (static_cast(len) != expect_len) return false; if (memcmp(buf, expect_msg, expect_len) != 0) return false; if (!fds_vec.empty()) return false; return true; } } // namespace // static ZygoteHost* ZygoteHost::GetInstance() { return ZygoteHostImpl::GetInstance(); } ZygoteHostImpl::ZygoteHostImpl() : control_fd_(-1), control_lock_(), pid_(-1), init_(false), use_suid_sandbox_for_adj_oom_score_(false), sandbox_binary_(), have_read_sandbox_status_word_(false), sandbox_status_(0), child_tracking_lock_(), list_of_running_zygote_children_(), should_teardown_after_last_child_exits_(false) {} ZygoteHostImpl::~ZygoteHostImpl() { TearDown(); } // static ZygoteHostImpl* ZygoteHostImpl::GetInstance() { return Singleton::get(); } void ZygoteHostImpl::Init(const std::string& sandbox_cmd) { DCHECK(!init_); init_ = true; base::FilePath chrome_path; CHECK(PathService::Get(base::FILE_EXE, &chrome_path)); base::CommandLine cmd_line(chrome_path); cmd_line.AppendSwitchASCII(switches::kProcessType, switches::kZygoteProcess); int fds[2]; CHECK(socketpair(AF_UNIX, SOCK_SEQPACKET, 0, fds) == 0); CHECK(UnixDomainSocket::EnableReceiveProcessId(fds[0])); base::FileHandleMappingVector fds_to_map; fds_to_map.push_back(std::make_pair(fds[1], kZygoteSocketPairFd)); base::LaunchOptions options; const base::CommandLine& browser_command_line = *base::CommandLine::ForCurrentProcess(); if (browser_command_line.HasSwitch(switches::kZygoteCmdPrefix)) { cmd_line.PrependWrapper( browser_command_line.GetSwitchValueNative(switches::kZygoteCmdPrefix)); } // Append any switches from the browser process that need to be forwarded on // to the zygote/renderers. // Should this list be obtained from browser_render_process_host.cc? static const char* kForwardSwitches[] = { switches::kAllowSandboxDebugging, switches::kDisableSeccompFilterSandbox, switches::kEnableLogging, // Support, e.g., --enable-logging=stderr. // Zygote process needs to know what resources to have loaded when it // becomes a renderer process. switches::kForceDeviceScaleFactor, switches::kLoggingLevel, switches::kNoSandbox, switches::kPpapiInProcess, switches::kRegisterPepperPlugins, switches::kV, switches::kVModule, }; cmd_line.CopySwitchesFrom(browser_command_line, kForwardSwitches, arraysize(kForwardSwitches)); GetContentClient()->browser()->AppendExtraCommandLineSwitches(&cmd_line, -1); sandbox_binary_ = sandbox_cmd.c_str(); const bool using_namespace_sandbox = ShouldUseNamespaceSandbox(); // A non empty sandbox_cmd means we want a SUID sandbox. const bool using_suid_sandbox = !sandbox_cmd.empty() && !using_namespace_sandbox; // Use the SUID sandbox for adjusting OOM scores when we are using the setuid // or namespace sandbox. This is needed beacuse the processes are // non-dumpable, so /proc/pid/oom_score_adj can only be written by root. use_suid_sandbox_for_adj_oom_score_ = using_namespace_sandbox || using_suid_sandbox; // Start up the sandbox host process and get the file descriptor for the // renderers to talk to it. const int sfd = RenderSandboxHostLinux::GetInstance()->GetRendererSocket(); fds_to_map.push_back(std::make_pair(sfd, GetSandboxFD())); base::ScopedFD dummy_fd; if (using_suid_sandbox) { scoped_ptr sandbox_host( sandbox::SetuidSandboxHost::Create()); sandbox_host->PrependWrapper(&cmd_line); sandbox_host->SetupLaunchOptions(&options, &fds_to_map, &dummy_fd); sandbox_host->SetupLaunchEnvironment(); } options.fds_to_remap = &fds_to_map; base::Process process = using_namespace_sandbox ? sandbox::NamespaceSandbox::LaunchProcess(cmd_line, options) : base::LaunchProcess(cmd_line, options); CHECK(process.IsValid()) << "Failed to launch zygote process"; dummy_fd.reset(); if (using_suid_sandbox || using_namespace_sandbox) { // The SUID sandbox will execute the zygote in a new PID namespace, and // the main zygote process will then fork from there. Watch now our // elaborate dance to find and validate the zygote's PID. // First we receive a message from the zygote boot process. base::ProcessId boot_pid; CHECK(ReceiveFixedMessage( fds[0], kZygoteBootMessage, sizeof(kZygoteBootMessage), &boot_pid)); // Within the PID namespace, the zygote boot process thinks it's PID 1, // but its real PID can never be 1. This gives us a reliable test that // the kernel is translating the sender's PID to our namespace. CHECK_GT(boot_pid, 1) << "Received invalid process ID for zygote; kernel might be too old? " "See crbug.com/357670 or try using --" << switches::kDisableSetuidSandbox << " to workaround."; // Now receive the message that the zygote's ready to go, along with the // main zygote process's ID. CHECK(ReceiveFixedMessage( fds[0], kZygoteHelloMessage, sizeof(kZygoteHelloMessage), &pid_)); CHECK_GT(pid_, 1); if (process.Pid() != pid_) { // Reap the sandbox. base::EnsureProcessGetsReaped(process.Pid()); } } else { // Not using the SUID sandbox. // Note that ~base::Process() will reset the internal value, but there's no // real "handle" on POSIX so that is safe. pid_ = process.Pid(); } close(fds[1]); control_fd_ = fds[0]; Pickle pickle; pickle.WriteInt(kZygoteCommandGetSandboxStatus); if (!SendMessage(pickle, NULL)) LOG(FATAL) << "Cannot communicate with zygote"; // We don't wait for the reply. We'll read it in ReadReply. } void ZygoteHostImpl::TearDownAfterLastChild() { bool do_teardown = false; { base::AutoLock lock(child_tracking_lock_); should_teardown_after_last_child_exits_ = true; do_teardown = list_of_running_zygote_children_.empty(); } if (do_teardown) { TearDown(); } } // Note: this is also called from the destructor. void ZygoteHostImpl::TearDown() { base::AutoLock lock(control_lock_); if (control_fd_ > -1) { // Closing the IPC channel will act as a notification to exit // to the Zygote. if (IGNORE_EINTR(close(control_fd_))) { PLOG(ERROR) << "Could not close Zygote control channel."; NOTREACHED(); } control_fd_ = -1; } } void ZygoteHostImpl::ZygoteChildBorn(pid_t process) { base::AutoLock lock(child_tracking_lock_); bool new_element_inserted = list_of_running_zygote_children_.insert(process).second; DCHECK(new_element_inserted); } void ZygoteHostImpl::ZygoteChildDied(pid_t process) { bool do_teardown = false; { base::AutoLock lock(child_tracking_lock_); size_t num_erased = list_of_running_zygote_children_.erase(process); DCHECK_EQ(1U, num_erased); do_teardown = should_teardown_after_last_child_exits_ && list_of_running_zygote_children_.empty(); } if (do_teardown) { TearDown(); } } bool ZygoteHostImpl::SendMessage(const Pickle& data, const std::vector* fds) { DCHECK_NE(-1, control_fd_); CHECK(data.size() <= kZygoteMaxMessageLength) << "Trying to send too-large message to zygote (sending " << data.size() << " bytes, max is " << kZygoteMaxMessageLength << ")"; CHECK(!fds || fds->size() <= UnixDomainSocket::kMaxFileDescriptors) << "Trying to send message with too many file descriptors to zygote " << "(sending " << fds->size() << ", max is " << UnixDomainSocket::kMaxFileDescriptors << ")"; return UnixDomainSocket::SendMsg(control_fd_, data.data(), data.size(), fds ? *fds : std::vector()); } ssize_t ZygoteHostImpl::ReadReply(void* buf, size_t buf_len) { DCHECK_NE(-1, control_fd_); // At startup we send a kZygoteCommandGetSandboxStatus request to the zygote, // but don't wait for the reply. Thus, the first time that we read from the // zygote, we get the reply to that request. if (!have_read_sandbox_status_word_) { if (HANDLE_EINTR(read(control_fd_, &sandbox_status_, sizeof(sandbox_status_))) != sizeof(sandbox_status_)) { return -1; } have_read_sandbox_status_word_ = true; UMA_HISTOGRAM_SPARSE_SLOWLY("Linux.SandboxStatus", sandbox_status_); } return HANDLE_EINTR(read(control_fd_, buf, buf_len)); } pid_t ZygoteHostImpl::ForkRequest(const std::vector& argv, scoped_ptr mapping, const std::string& process_type) { DCHECK(init_); Pickle pickle; int raw_socks[2]; PCHECK(0 == socketpair(AF_UNIX, SOCK_SEQPACKET, 0, raw_socks)); base::ScopedFD my_sock(raw_socks[0]); base::ScopedFD peer_sock(raw_socks[1]); CHECK(UnixDomainSocket::EnableReceiveProcessId(my_sock.get())); pickle.WriteInt(kZygoteCommandFork); pickle.WriteString(process_type); pickle.WriteInt(argv.size()); for (std::vector::const_iterator i = argv.begin(); i != argv.end(); ++i) pickle.WriteString(*i); // Fork requests contain one file descriptor for the PID oracle, and one // more for each file descriptor mapping for the child process. const size_t num_fds_to_send = 1 + mapping->GetMappingSize(); pickle.WriteInt(num_fds_to_send); std::vector fds; // First FD to send is peer_sock. // TODO(morrita): Ideally, this should be part of the mapping so that // FileDescriptorInfo can manages its lifetime. fds.push_back(peer_sock.get()); // The rest come from mapping. for (size_t i = 0; i < mapping->GetMappingSize(); ++i) { pickle.WriteUInt32(mapping->GetIDAt(i)); fds.push_back(mapping->GetFDAt(i)); } // Sanity check that we've populated |fds| correctly. DCHECK_EQ(num_fds_to_send, fds.size()); pid_t pid; { base::AutoLock lock(control_lock_); if (!SendMessage(pickle, &fds)) return base::kNullProcessHandle; mapping.reset(); peer_sock.reset(); { char buf[sizeof(kZygoteChildPingMessage) + 1]; ScopedVector recv_fds; base::ProcessId real_pid; ssize_t n = UnixDomainSocket::RecvMsgWithPid( my_sock.get(), buf, sizeof(buf), &recv_fds, &real_pid); if (n != sizeof(kZygoteChildPingMessage) || 0 != memcmp(buf, kZygoteChildPingMessage, sizeof(kZygoteChildPingMessage))) { // Zygote children should still be trustworthy when they're supposed to // ping us, so something's broken if we don't receive a valid ping. LOG(ERROR) << "Did not receive ping from zygote child"; NOTREACHED(); real_pid = -1; } my_sock.reset(); // Always send PID back to zygote. Pickle pid_pickle; pid_pickle.WriteInt(kZygoteCommandForkRealPID); pid_pickle.WriteInt(real_pid); if (!SendMessage(pid_pickle, NULL)) return base::kNullProcessHandle; } // Read the reply, which pickles the PID and an optional UMA enumeration. static const unsigned kMaxReplyLength = 2048; char buf[kMaxReplyLength]; const ssize_t len = ReadReply(buf, sizeof(buf)); Pickle reply_pickle(buf, len); PickleIterator iter(reply_pickle); if (len <= 0 || !iter.ReadInt(&pid)) return base::kNullProcessHandle; // If there is a nonempty UMA name string, then there is a UMA // enumeration to record. std::string uma_name; int uma_sample; int uma_boundary_value; if (iter.ReadString(&uma_name) && !uma_name.empty() && iter.ReadInt(&uma_sample) && iter.ReadInt(&uma_boundary_value)) { // We cannot use the UMA_HISTOGRAM_ENUMERATION macro here, // because that's only for when the name is the same every time. // Here we're using whatever name we got from the other side. // But since it's likely that the same one will be used repeatedly // (even though it's not guaranteed), we cache it here. static base::HistogramBase* uma_histogram; if (!uma_histogram || uma_histogram->histogram_name() != uma_name) { uma_histogram = base::LinearHistogram::FactoryGet( uma_name, 1, uma_boundary_value, uma_boundary_value + 1, base::HistogramBase::kUmaTargetedHistogramFlag); } uma_histogram->Add(uma_sample); } if (pid <= 0) return base::kNullProcessHandle; } #if !defined(OS_OPENBSD) // This is just a starting score for a renderer or extension (the // only types of processes that will be started this way). It will // get adjusted as time goes on. (This is the same value as // chrome::kLowestRendererOomScore in chrome/chrome_constants.h, but // that's not something we can include here.) const int kLowestRendererOomScore = 300; AdjustRendererOOMScore(pid, kLowestRendererOomScore); #endif ZygoteChildBorn(pid); return pid; } #if !defined(OS_OPENBSD) void ZygoteHostImpl::AdjustRendererOOMScore(base::ProcessHandle pid, int score) { // 1) You can't change the oom_score_adj of a non-dumpable process // (EPERM) unless you're root. Because of this, we can't set the // oom_adj from the browser process. // // 2) We can't set the oom_score_adj before entering the sandbox // because the zygote is in the sandbox and the zygote is as // critical as the browser process. Its oom_adj value shouldn't // be changed. // // 3) A non-dumpable process can't even change its own oom_score_adj // because it's root owned 0644. The sandboxed processes don't // even have /proc, but one could imagine passing in a descriptor // from outside. // // So, in the normal case, we use the SUID binary to change it for us. // However, Fedora (and other SELinux systems) don't like us touching other // process's oom_score_adj (or oom_adj) values // (https://bugzilla.redhat.com/show_bug.cgi?id=581256). // // The offical way to get the SELinux mode is selinux_getenforcemode, but I // don't want to add another library to the build as it's sure to cause // problems with other, non-SELinux distros. // // So we just check for files in /selinux. This isn't foolproof, but it's not // bad and it's easy. static bool selinux; static bool selinux_valid = false; if (!selinux_valid) { const base::FilePath kSelinuxPath("/selinux"); base::FileEnumerator en(kSelinuxPath, false, base::FileEnumerator::FILES); bool has_selinux_files = !en.Next().empty(); selinux = access(kSelinuxPath.value().c_str(), X_OK) == 0 && has_selinux_files; selinux_valid = true; } if (use_suid_sandbox_for_adj_oom_score_ && !selinux) { #if defined(USE_TCMALLOC) // If heap profiling is running, these processes are not exiting, at least // on ChromeOS. The easiest thing to do is not launch them when profiling. // TODO(stevenjb): Investigate further and fix. if (IsHeapProfilerRunning()) return; #endif std::vector adj_oom_score_cmdline; adj_oom_score_cmdline.push_back(sandbox_binary_); adj_oom_score_cmdline.push_back(sandbox::kAdjustOOMScoreSwitch); adj_oom_score_cmdline.push_back(base::Int64ToString(pid)); adj_oom_score_cmdline.push_back(base::IntToString(score)); base::Process sandbox_helper_process; base::LaunchOptions options; // sandbox_helper_process is a setuid binary. options.allow_new_privs = true; sandbox_helper_process = base::LaunchProcess(adj_oom_score_cmdline, options); if (sandbox_helper_process.IsValid()) base::EnsureProcessGetsReaped(sandbox_helper_process.Pid()); } else if (!use_suid_sandbox_for_adj_oom_score_) { if (!base::AdjustOOMScore(pid, score)) PLOG(ERROR) << "Failed to adjust OOM score of renderer with pid " << pid; } } #endif void ZygoteHostImpl::EnsureProcessTerminated(pid_t process) { DCHECK(init_); Pickle pickle; pickle.WriteInt(kZygoteCommandReap); pickle.WriteInt(process); if (!SendMessage(pickle, NULL)) LOG(ERROR) << "Failed to send Reap message to zygote"; ZygoteChildDied(process); } base::TerminationStatus ZygoteHostImpl::GetTerminationStatus( base::ProcessHandle handle, bool known_dead, int* exit_code) { DCHECK(init_); Pickle pickle; pickle.WriteInt(kZygoteCommandGetTerminationStatus); pickle.WriteBool(known_dead); pickle.WriteInt(handle); static const unsigned kMaxMessageLength = 128; char buf[kMaxMessageLength]; ssize_t len; { base::AutoLock lock(control_lock_); if (!SendMessage(pickle, NULL)) LOG(ERROR) << "Failed to send GetTerminationStatus message to zygote"; len = ReadReply(buf, sizeof(buf)); } // Set this now to handle the error cases. if (exit_code) *exit_code = RESULT_CODE_NORMAL_EXIT; int status = base::TERMINATION_STATUS_NORMAL_TERMINATION; if (len == -1) { LOG(WARNING) << "Error reading message from zygote: " << errno; } else if (len == 0) { LOG(WARNING) << "Socket closed prematurely."; } else { Pickle read_pickle(buf, len); int tmp_status, tmp_exit_code; PickleIterator iter(read_pickle); if (!iter.ReadInt(&tmp_status) || !iter.ReadInt(&tmp_exit_code)) { LOG(WARNING) << "Error parsing GetTerminationStatus response from zygote."; } else { if (exit_code) *exit_code = tmp_exit_code; status = tmp_status; } } if (status != base::TERMINATION_STATUS_STILL_RUNNING) { ZygoteChildDied(handle); } return static_cast(status); } pid_t ZygoteHostImpl::GetPid() const { return pid_; } int ZygoteHostImpl::GetSandboxStatus() const { if (have_read_sandbox_status_word_) return sandbox_status_; return 0; } bool ZygoteHostImpl::ShouldUseNamespaceSandbox() { const base::CommandLine& command_line = *base::CommandLine::ForCurrentProcess(); if (command_line.HasSwitch(switches::kNoSandbox)) { return false; } if (command_line.HasSwitch(switches::kDisableNamespaceSandbox)) { return false; } if (!sandbox::Credentials::CanCreateProcessInNewUserNS()) { return false; } return true; } } // namespace content