diff options
Diffstat (limited to 'src/VBox/NetworkServices')
52 files changed, 17843 insertions, 2720 deletions
diff --git a/src/VBox/NetworkServices/DHCP/ClientDataInt.h b/src/VBox/NetworkServices/DHCP/ClientDataInt.h new file mode 100644 index 00000000..3c544efb --- /dev/null +++ b/src/VBox/NetworkServices/DHCP/ClientDataInt.h @@ -0,0 +1,67 @@ +/* $Id: ClientDataInt.h $ */ +/** @file + * Config.h + */ + +/* + * Copyright (C) 2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef __CLIENT_DATA_INT_H__ +#define __CLIENT_DATA_INT_H__ + +class ClientData +{ +public: + ClientData() + { + m_address.u = 0; + m_network.u = 0; + fHasLease = false; + fHasClient = false; + fBinding = true; + u64TimestampBindingStarted = 0; + u64TimestampLeasingStarted = 0; + u32LeaseExpirationPeriod = 0; + u32BindExpirationPeriod = 0; + pCfg = NULL; + + } + ~ClientData(){} + + /* client information */ + RTNETADDRIPV4 m_address; + RTNETADDRIPV4 m_network; + RTMAC m_mac; + + bool fHasClient; + + /* Lease part */ + bool fHasLease; + /** lease isn't commited */ + bool fBinding; + + /** Timestamp when lease commited. */ + uint64_t u64TimestampLeasingStarted; + /** Period when lease is expired in secs. */ + uint32_t u32LeaseExpirationPeriod; + + /** timestamp when lease was bound */ + uint64_t u64TimestampBindingStarted; + /* Period when binding is expired in secs. */ + uint32_t u32BindExpirationPeriod; + + MapOptionId2RawOption options; + + NetworkConfigEntity *pCfg; +}; + +#endif diff --git a/src/VBox/NetworkServices/DHCP/Config.cpp b/src/VBox/NetworkServices/DHCP/Config.cpp new file mode 100644 index 00000000..2c76b5e2 --- /dev/null +++ b/src/VBox/NetworkServices/DHCP/Config.cpp @@ -0,0 +1,1305 @@ +/* $Id: Config.cpp $ */ +/** @file + * Configuration for DHCP. + */ + +/* + * Copyright (C) 2013-2014 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** + * XXX: license. + */ + +#include <iprt/asm.h> +#include <iprt/getopt.h> +#include <iprt/net.h> +#include <iprt/time.h> + +#include <VBox/sup.h> +#include <VBox/intnet.h> +#include <VBox/intnetinline.h> +#include <VBox/vmm/vmm.h> +#include <VBox/version.h> + +#include <VBox/com/string.h> + +#include <iprt/cpp/xml.h> + +#define BASE_SERVICES_ONLY +#include "../NetLib/VBoxNetBaseService.h" +#include "../NetLib/VBoxNetLib.h" +#include "../NetLib/shared_ptr.h" + +#include <list> +#include <vector> +#include <map> +#include <string> + +#include "Config.h" +#include "ClientDataInt.h" + +bool operator== (const Lease& lhs, const Lease& rhs) +{ + return (lhs.m.get() == rhs.m.get()); +} + + +bool operator!= (const Lease& lhs, const Lease& rhs) +{ + return !(lhs == rhs); +} + + +bool operator< (const Lease& lhs, const Lease& rhs) +{ + return ( (lhs.getAddress() < rhs.getAddress()) + || (lhs.issued() < rhs.issued())); +} +/* consts */ + +const NullConfigEntity *g_NullConfig = new NullConfigEntity(); +RootConfigEntity *g_RootConfig = new RootConfigEntity(std::string("ROOT"), 1200 /* 20 min. */); +const ClientMatchCriteria *g_AnyClient = new AnyClientMatchCriteria(); + +static ConfigurationManager *g_ConfigurationManager = ConfigurationManager::getConfigurationManager(); + +static NetworkManager *g_NetworkManager = NetworkManager::getNetworkManager(); + +bool MACClientMatchCriteria::check(const Client& client) const +{ + return (client == m_mac); +} + + +int BaseConfigEntity::match(Client& client, BaseConfigEntity **cfg) +{ + int iMatch = (m_criteria && m_criteria->check(client)? m_MatchLevel: 0); + if (m_children.empty()) + { + if (iMatch > 0) + { + *cfg = this; + return iMatch; + } + } + else + { + *cfg = this; + /* XXX: hack */ + BaseConfigEntity *matching = this; + int matchingLevel = m_MatchLevel; + + for (std::vector<BaseConfigEntity *>::iterator it = m_children.begin(); + it != m_children.end(); + ++it) + { + iMatch = (*it)->match(client, &matching); + if (iMatch > matchingLevel) + { + *cfg = matching; + matchingLevel = iMatch; + } + } + return matchingLevel; + } + return iMatch; +} + +/* Client */ +/* Configs + NetworkConfigEntity(std::string name, + ConfigEntity* pCfg, + ClientMatchCriteria* criteria, + RTNETADDRIPV4& networkID, + RTNETADDRIPV4& networkMask) +*/ +static const RTNETADDRIPV4 g_AnyIpv4 = {0}; +static const RTNETADDRIPV4 g_AllIpv4 = {0xffffffff}; +RootConfigEntity::RootConfigEntity(std::string name, uint32_t expPeriod): + NetworkConfigEntity(name, g_NullConfig, g_AnyClient, g_AnyIpv4, g_AllIpv4) +{ + m_MatchLevel = 2; + m_u32ExpirationPeriod = expPeriod; +} + +/* Configuration Manager */ +struct ConfigurationManager::Data +{ + Data():fFileExists(false){} + + MapLease2Ip4Address m_allocations; + Ipv4AddressContainer m_nameservers; + Ipv4AddressContainer m_routers; + + std::string m_domainName; + VecClient m_clients; + com::Utf8Str m_leaseStorageFilename; + bool fFileExists; +}; + +ConfigurationManager *ConfigurationManager::getConfigurationManager() +{ + if (!g_ConfigurationManager) + + + { + g_ConfigurationManager = new ConfigurationManager(); + g_ConfigurationManager->init(); + } + + return g_ConfigurationManager; +} + + +const std::string tagXMLLeases = "Leases"; +const std::string tagXMLLeasesAttributeVersion = "version"; +const std::string tagXMLLeasesVersion_1_0 = "1.0"; +const std::string tagXMLLease = "Lease"; +const std::string tagXMLLeaseAttributeMac = "mac"; +const std::string tagXMLLeaseAttributeNetwork = "network"; +const std::string tagXMLLeaseAddress = "Address"; +const std::string tagXMLAddressAttributeValue = "value"; +const std::string tagXMLLeaseTime = "Time"; +const std::string tagXMLTimeAttributeIssued = "issued"; +const std::string tagXMLTimeAttributeExpiration = "expiration"; +const std::string tagXMLLeaseOptions = "Options"; + +/** + * <Leases version="1.0"> + * <Lease mac="" network=""/> + * <Address value=""/> + * <Time issued="" expiration=""/> + * <options> + * <option name="" type=""/> + * </option> + * </options> + * </Lease> + * </Leases> + */ +int ConfigurationManager::loadFromFile(const com::Utf8Str& leaseStorageFileName) +{ + m->m_leaseStorageFilename = leaseStorageFileName; + + xml::XmlFileParser parser; + xml::Document doc; + + try { + parser.read(m->m_leaseStorageFilename.c_str(), doc); + } + catch (...) + { + return VINF_SUCCESS; + } + + /* XML parsing */ + xml::ElementNode *root = doc.getRootElement(); + + if (!root || !root->nameEquals(tagXMLLeases.c_str())) + { + m->fFileExists = false; + return VERR_NOT_FOUND; + } + + com::Utf8Str version; + if (root) + root->getAttributeValue(tagXMLLeasesAttributeVersion.c_str(), version); + + /* XXX: version check */ + xml::NodesLoop leases(*root); + + bool valueExists; + const xml::ElementNode *lease; + while ((lease = leases.forAllNodes())) + { + if (!lease->nameEquals(tagXMLLease.c_str())) + continue; + + ClientData *data = new ClientData(); + Lease l(data); + if (l.fromXML(lease)) + { + + m->m_allocations.insert(MapLease2Ip4AddressPair(l, l.getAddress())); + + + NetworkConfigEntity *pNetCfg = NULL; + Client c(data); + int rc = g_RootConfig->match(c, (BaseConfigEntity **)&pNetCfg); + Assert(rc >= 0 && pNetCfg); + + l.setConfig(pNetCfg); + + m->m_clients.push_back(c); + } + } + + return VINF_SUCCESS; +} + + +int ConfigurationManager::saveToFile() +{ + if (m->m_leaseStorageFilename.isEmpty()) + return VINF_SUCCESS; + + xml::Document doc; + + xml::ElementNode *root = doc.createRootElement(tagXMLLeases.c_str()); + if (!root) + return VERR_INTERNAL_ERROR; + + root->setAttribute(tagXMLLeasesAttributeVersion.c_str(), tagXMLLeasesVersion_1_0.c_str()); + + for(MapLease2Ip4AddressConstIterator it = m->m_allocations.begin(); + it != m->m_allocations.end(); ++it) + { + xml::ElementNode *lease = root->createChild(tagXMLLease.c_str()); + if (!it->first.toXML(lease)) + { + /* XXX: todo logging + error handling */ + } + } + + try { + xml::XmlFileWriter writer(doc); + writer.write(m->m_leaseStorageFilename.c_str(), true); + } catch(...){} + + return VINF_SUCCESS; +} + + +int ConfigurationManager::extractRequestList(PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg, RawOption& rawOpt) +{ + return ConfigurationManager::findOption(RTNET_DHCP_OPT_PARAM_REQ_LIST, pDhcpMsg, cbDhcpMsg, rawOpt); +} + + +Client ConfigurationManager::getClientByDhcpPacket(const RTNETBOOTP *pDhcpMsg, size_t cbDhcpMsg) +{ + + VecClientIterator it; + bool fDhcpValid = false; + uint8_t uMsgType = 0; + + fDhcpValid = RTNetIPv4IsDHCPValid(NULL, pDhcpMsg, cbDhcpMsg, &uMsgType); + AssertReturn(fDhcpValid, Client::NullClient); + + LogFlowFunc(("dhcp:mac:%RTmac\n", &pDhcpMsg->bp_chaddr.Mac)); + /* 1st. client IDs */ + for ( it = m->m_clients.begin(); + it != m->m_clients.end(); + ++it) + { + if ((*it) == pDhcpMsg->bp_chaddr.Mac) + { + LogFlowFunc(("client:mac:%RTmac\n", it->getMacAddress())); + /* check timestamp that request wasn't expired. */ + return (*it); + } + } + + if (it == m->m_clients.end()) + { + /* We hasn't got any session for this client */ + Client c; + c.initWithMac(pDhcpMsg->bp_chaddr.Mac); + m->m_clients.push_back(c); + return m->m_clients.back(); + } + + return Client::NullClient; +} + +/** + * Finds an option. + * + * @returns On success, a pointer to the first byte in the option data (no none + * then it'll be the byte following the 0 size field) and *pcbOpt set + * to the option length. + * On failure, NULL is returned and *pcbOpt unchanged. + * + * @param uOption The option to search for. + * @param pDhcpMsg The DHCP message. + * that this is adjusted if the option length is larger + * than the message buffer. + */ +int +ConfigurationManager::findOption(uint8_t uOption, PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg, RawOption& opt) +{ + Assert(uOption != RTNET_DHCP_OPT_PAD); + + /* + * Validate the DHCP bits and figure the max size of the options in the vendor field. + */ + if (cbDhcpMsg <= RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts)) + return VERR_INVALID_PARAMETER; + + if (pDhcpMsg->bp_vend.Dhcp.dhcp_cookie != RT_H2N_U32_C(RTNET_DHCP_COOKIE)) + return VERR_INVALID_PARAMETER; + + size_t cbLeft = cbDhcpMsg - RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts); + if (cbLeft > RTNET_DHCP_OPT_SIZE) + cbLeft = RTNET_DHCP_OPT_SIZE; + + /* + * Search the vendor field. + */ + bool fExtended = false; + uint8_t const *pb = &pDhcpMsg->bp_vend.Dhcp.dhcp_opts[0]; + while (pb && cbLeft > 0) + { + uint8_t uCur = *pb; + if (uCur == RTNET_DHCP_OPT_PAD) + { + cbLeft--; + pb++; + } + else if (cbLeft <= 1) + break; + else + { + size_t cbCur = pb[1]; + if (cbCur > cbLeft - 2) + cbCur = cbLeft - 2; + if (uCur == uOption) + { + opt.u8OptId = uCur; + memcpy(opt.au8RawOpt, pb+2, cbCur); + opt.cbRawOpt = cbCur; + return VINF_SUCCESS; + } + pb += cbCur + 2; + cbLeft -= cbCur - 2; + } + } + + /** @todo search extended dhcp option field(s) when present */ + + return VERR_NOT_FOUND; +} + + +/** + * We bind lease for client till it continue with it on DHCPREQUEST. + */ +Lease ConfigurationManager::allocateLease4Client(const Client& client, PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg) +{ + { + /** + * This mean that client has already bound or commited lease. + * If we've it happens it means that we received DHCPDISCOVER twice. + */ + const Lease l = client.lease(); + if (l != Lease::NullLease) + { + /* Here we should take lease from the m_allocation which was feed with leases + * on start + */ + if (l.isExpired()) + { + expireLease4Client(const_cast<Client&>(client)); + if (!l.isExpired()) + return l; + } + else + { + AssertReturn(l.getAddress().u != 0, Lease::NullLease); + return l; + } + } + } + + RTNETADDRIPV4 hintAddress; + RawOption opt; + NetworkConfigEntity *pNetCfg; + + Client cl(client); + AssertReturn(g_RootConfig->match(cl, (BaseConfigEntity **)&pNetCfg) > 0, Lease::NullLease); + + /* DHCPDISCOVER MAY contain request address */ + hintAddress.u = 0; + int rc = findOption(RTNET_DHCP_OPT_REQ_ADDR, pDhcpMsg, cbDhcpMsg, opt); + if (RT_SUCCESS(rc)) + { + hintAddress.u = *(uint32_t *)opt.au8RawOpt; + if ( RT_H2N_U32(hintAddress.u) < RT_H2N_U32(pNetCfg->lowerIp().u) + || RT_H2N_U32(hintAddress.u) > RT_H2N_U32(pNetCfg->upperIp().u)) + hintAddress.u = 0; /* clear hint */ + } + + if ( hintAddress.u + && !isAddressTaken(hintAddress)) + { + Lease l(cl); + l.setConfig(pNetCfg); + l.setAddress(hintAddress); + m->m_allocations.insert(MapLease2Ip4AddressPair(l, hintAddress)); + return l; + } + + uint32_t u32 = 0; + for(u32 = RT_H2N_U32(pNetCfg->lowerIp().u); + u32 <= RT_H2N_U32(pNetCfg->upperIp().u); + ++u32) + { + RTNETADDRIPV4 address; + address.u = RT_H2N_U32(u32); + if (!isAddressTaken(address)) + { + Lease l(cl); + l.setConfig(pNetCfg); + l.setAddress(address); + m->m_allocations.insert(MapLease2Ip4AddressPair(l, address)); + return l; + } + } + + return Lease::NullLease; +} + + +int ConfigurationManager::commitLease4Client(Client& client) +{ + Lease l = client.lease(); + AssertReturn(l != Lease::NullLease, VERR_INTERNAL_ERROR); + + l.bindingPhase(false); + const NetworkConfigEntity *pCfg = l.getConfig(); + + AssertPtr(pCfg); + l.setExpiration(pCfg->expirationPeriod()); + l.phaseStart(RTTimeMilliTS()); + + saveToFile(); + + return VINF_SUCCESS; +} + + +int ConfigurationManager::expireLease4Client(Client& client) +{ + Lease l = client.lease(); + AssertReturn(l != Lease::NullLease, VERR_INTERNAL_ERROR); + + if (l.isInBindingPhase()) + { + + MapLease2Ip4AddressIterator it = m->m_allocations.find(l); + AssertReturn(it != m->m_allocations.end(), VERR_NOT_FOUND); + + /* + * XXX: perhaps it better to keep this allocation ???? + */ + m->m_allocations.erase(it); + + l.expire(); + return VINF_SUCCESS; + } + + l = Lease(client); /* re-new */ + return VINF_SUCCESS; +} + + +bool ConfigurationManager::isAddressTaken(const RTNETADDRIPV4& addr, Lease& lease) +{ + MapLease2Ip4AddressIterator it; + + for (it = m->m_allocations.begin(); + it != m->m_allocations.end(); + ++it) + { + if (it->second.u == addr.u) + { + if (lease != Lease::NullLease) + lease = it->first; + + return true; + } + } + lease = Lease::NullLease; + return false; +} + + +bool ConfigurationManager::isAddressTaken(const RTNETADDRIPV4& addr) +{ + Lease ignore; + return isAddressTaken(addr, ignore); +} + + +NetworkConfigEntity *ConfigurationManager::addNetwork(NetworkConfigEntity *, + const RTNETADDRIPV4& networkId, + const RTNETADDRIPV4& netmask, + RTNETADDRIPV4& LowerAddress, + RTNETADDRIPV4& UpperAddress) +{ + static int id; + char name[64]; + + RTStrPrintf(name, RT_ELEMENTS(name), "network-%d", id); + std::string strname(name); + id++; + + + if (!LowerAddress.u) + LowerAddress = networkId; + + if (!UpperAddress.u) + UpperAddress.u = networkId.u | (~netmask.u); + + return new NetworkConfigEntity(strname, + g_RootConfig, + g_AnyClient, + 5, + networkId, + netmask, + LowerAddress, + UpperAddress); +} + +HostConfigEntity *ConfigurationManager::addHost(NetworkConfigEntity* pCfg, + const RTNETADDRIPV4& address, + ClientMatchCriteria *criteria) +{ + static int id; + char name[64]; + + RTStrPrintf(name, RT_ELEMENTS(name), "host-%d", id); + std::string strname(name); + id++; + + return new HostConfigEntity(address, strname, pCfg, criteria); +} + +int ConfigurationManager::addToAddressList(uint8_t u8OptId, RTNETADDRIPV4& address) +{ + switch(u8OptId) + { + case RTNET_DHCP_OPT_DNS: + m->m_nameservers.push_back(address); + break; + case RTNET_DHCP_OPT_ROUTERS: + m->m_routers.push_back(address); + break; + default: + Log(("dhcp-opt: list (%d) unsupported\n", u8OptId)); + } + return VINF_SUCCESS; +} + + +int ConfigurationManager::flushAddressList(uint8_t u8OptId) +{ + switch(u8OptId) + { + case RTNET_DHCP_OPT_DNS: + m->m_nameservers.clear(); + break; + case RTNET_DHCP_OPT_ROUTERS: + m->m_routers.clear(); + break; + default: + Log(("dhcp-opt: list (%d) unsupported\n", u8OptId)); + } + return VINF_SUCCESS; +} + + +const Ipv4AddressContainer& ConfigurationManager::getAddressList(uint8_t u8OptId) +{ + switch(u8OptId) + { + case RTNET_DHCP_OPT_DNS: + return m->m_nameservers; + + case RTNET_DHCP_OPT_ROUTERS: + return m->m_routers; + + } + /* XXX: Grrr !!! */ + return m_empty; +} + + +int ConfigurationManager::setString(uint8_t u8OptId, const std::string& str) +{ + switch (u8OptId) + { + case RTNET_DHCP_OPT_DOMAIN_NAME: + m->m_domainName = str; + break; + default: + break; + } + + return VINF_SUCCESS; +} + + +const std::string& ConfigurationManager::getString(uint8_t u8OptId) +{ + switch (u8OptId) + { + case RTNET_DHCP_OPT_DOMAIN_NAME: + if (m->m_domainName.length()) + return m->m_domainName; + else + return m_noString; + default: + break; + } + + return m_noString; +} + + +void ConfigurationManager::init() +{ + m = new ConfigurationManager::Data(); +} + + +ConfigurationManager::~ConfigurationManager() { if (m) delete m; } + +/** + * Network manager + */ +struct NetworkManager::Data +{ + Data() + { + RT_ZERO(BootPReplyMsg); + cbBooPReplyMsg = 0; + + m_OurAddress.u = 0; + m_OurNetmask.u = 0; + RT_ZERO(m_OurMac); + } + + union { + RTNETBOOTP BootPHeader; + uint8_t au8Storage[1024]; + } BootPReplyMsg; + int cbBooPReplyMsg; + + RTNETADDRIPV4 m_OurAddress; + RTNETADDRIPV4 m_OurNetmask; + RTMAC m_OurMac; + const VBoxNetHlpUDPService *m_service; +}; + + +NetworkManager::NetworkManager():m(NULL) +{ + m = new NetworkManager::Data(); +} + + +NetworkManager::~NetworkManager() +{ + delete m; + m = NULL; +} + + +NetworkManager *NetworkManager::getNetworkManager() +{ + if (!g_NetworkManager) + g_NetworkManager = new NetworkManager(); + + return g_NetworkManager; +} + + +const RTNETADDRIPV4& NetworkManager::getOurAddress() const +{ + return m->m_OurAddress; +} + + +const RTNETADDRIPV4& NetworkManager::getOurNetmask() const +{ + return m->m_OurNetmask; +} + + +const RTMAC& NetworkManager::getOurMac() const +{ + return m->m_OurMac; +} + + +void NetworkManager::setOurAddress(const RTNETADDRIPV4& aAddress) +{ + m->m_OurAddress = aAddress; +} + + +void NetworkManager::setOurNetmask(const RTNETADDRIPV4& aNetmask) +{ + m->m_OurNetmask = aNetmask; +} + + +void NetworkManager::setOurMac(const RTMAC& aMac) +{ + m->m_OurMac = aMac; +} + + +void NetworkManager::setService(const VBoxNetHlpUDPService *srv) +{ + m->m_service = srv; +} + +/** + * Network manager creates DHCPOFFER datagramm + */ +int NetworkManager::offer4Client(const Client& client, uint32_t u32Xid, + uint8_t *pu8ReqList, int cReqList) +{ + Lease l(client); /* XXX: oh, it looks badly, but now we have lease */ + prepareReplyPacket4Client(client, u32Xid); + + RTNETADDRIPV4 address = l.getAddress(); + m->BootPReplyMsg.BootPHeader.bp_yiaddr = address; + + /* Ubuntu ???*/ + m->BootPReplyMsg.BootPHeader.bp_ciaddr = address; + + /* options: + * - IP lease time + * - message type + * - server identifier + */ + RawOption opt; + RT_ZERO(opt); + + std::vector<RawOption> extra; + opt.u8OptId = RTNET_DHCP_OPT_MSG_TYPE; + opt.au8RawOpt[0] = RTNET_DHCP_MT_OFFER; + opt.cbRawOpt = 1; + extra.push_back(opt); + + opt.u8OptId = RTNET_DHCP_OPT_LEASE_TIME; + + const NetworkConfigEntity *pCfg = l.getConfig(); + AssertPtr(pCfg); + + *(uint32_t *)opt.au8RawOpt = RT_H2N_U32(pCfg->expirationPeriod()); + opt.cbRawOpt = sizeof(RTNETADDRIPV4); + + extra.push_back(opt); + + processParameterReqList(client, pu8ReqList, cReqList, extra); + + return doReply(client, extra); +} + +/** + * Network manager creates DHCPACK + */ +int NetworkManager::ack(const Client& client, uint32_t u32Xid, + uint8_t *pu8ReqList, int cReqList) +{ + RTNETADDRIPV4 address; + + prepareReplyPacket4Client(client, u32Xid); + + Lease l = client.lease(); + address = l.getAddress(); + m->BootPReplyMsg.BootPHeader.bp_ciaddr = address; + + + /* rfc2131 4.3.1 is about DHCPDISCOVER and this value is equal to ciaddr from + * DHCPREQUEST or 0 ... + * XXX: Using addressHint is not correct way to initialize [cy]iaddress... + */ + m->BootPReplyMsg.BootPHeader.bp_ciaddr = address; + m->BootPReplyMsg.BootPHeader.bp_yiaddr = address; + + Assert(m->BootPReplyMsg.BootPHeader.bp_yiaddr.u); + + /* options: + * - IP address lease time (if DHCPREQUEST) + * - message type + * - server identifier + */ + RawOption opt; + RT_ZERO(opt); + + std::vector<RawOption> extra; + opt.u8OptId = RTNET_DHCP_OPT_MSG_TYPE; + opt.au8RawOpt[0] = RTNET_DHCP_MT_ACK; + opt.cbRawOpt = 1; + extra.push_back(opt); + + /* + * XXX: lease time should be conditional. If on dhcprequest then tim should be provided, + * else on dhcpinform it mustn't. + */ + opt.u8OptId = RTNET_DHCP_OPT_LEASE_TIME; + *(uint32_t *)opt.au8RawOpt = RT_H2N_U32(l.getExpiration()); + opt.cbRawOpt = sizeof(RTNETADDRIPV4); + extra.push_back(opt); + + processParameterReqList(client, pu8ReqList, cReqList, extra); + + return doReply(client, extra); +} + +/** + * Network manager creates DHCPNAK + */ +int NetworkManager::nak(const Client& client, uint32_t u32Xid) +{ + + Lease l = client.lease(); + if (l == Lease::NullLease) + return VERR_INTERNAL_ERROR; + + prepareReplyPacket4Client(client, u32Xid); + + /* this field filed in prepareReplyPacket4Session, and + * RFC 2131 require to have it zero fo NAK. + */ + m->BootPReplyMsg.BootPHeader.bp_yiaddr.u = 0; + + /* options: + * - message type (if DHCPREQUEST) + * - server identifier + */ + RawOption opt; + std::vector<RawOption> extra; + + opt.u8OptId = RTNET_DHCP_OPT_MSG_TYPE; + opt.au8RawOpt[0] = RTNET_DHCP_MT_NAC; + opt.cbRawOpt = 1; + extra.push_back(opt); + + return doReply(client, extra); +} + +/** + * + */ +int NetworkManager::prepareReplyPacket4Client(const Client& client, uint32_t u32Xid) +{ + RT_ZERO(m->BootPReplyMsg); + + m->BootPReplyMsg.BootPHeader.bp_op = RTNETBOOTP_OP_REPLY; + m->BootPReplyMsg.BootPHeader.bp_htype = RTNET_ARP_ETHER; + m->BootPReplyMsg.BootPHeader.bp_hlen = sizeof(RTMAC); + m->BootPReplyMsg.BootPHeader.bp_hops = 0; + m->BootPReplyMsg.BootPHeader.bp_xid = u32Xid; + m->BootPReplyMsg.BootPHeader.bp_secs = 0; + /* XXX: bp_flags should be processed specially */ + m->BootPReplyMsg.BootPHeader.bp_flags = 0; + m->BootPReplyMsg.BootPHeader.bp_ciaddr.u = 0; + m->BootPReplyMsg.BootPHeader.bp_giaddr.u = 0; + + m->BootPReplyMsg.BootPHeader.bp_chaddr.Mac = client.getMacAddress(); + + const Lease l = client.lease(); + m->BootPReplyMsg.BootPHeader.bp_yiaddr = l.getAddress(); + m->BootPReplyMsg.BootPHeader.bp_siaddr.u = 0; + + + m->BootPReplyMsg.BootPHeader.bp_vend.Dhcp.dhcp_cookie = RT_H2N_U32_C(RTNET_DHCP_COOKIE); + + memset(&m->BootPReplyMsg.BootPHeader.bp_vend.Dhcp.dhcp_opts[0], + '\0', + RTNET_DHCP_OPT_SIZE); + + return VINF_SUCCESS; +} + + +int NetworkManager::doReply(const Client& client, const std::vector<RawOption>& extra) +{ + int rc; + + /* + Options.... + */ + VBoxNetDhcpWriteCursor Cursor(&m->BootPReplyMsg.BootPHeader, RTNET_DHCP_NORMAL_SIZE); + + /* The basics */ + + Cursor.optIPv4Addr(RTNET_DHCP_OPT_SERVER_ID, m->m_OurAddress); + + const Lease l = client.lease(); + const std::map<uint8_t, RawOption>& options = l.options(); + + for(std::vector<RawOption>::const_iterator it = extra.begin(); + it != extra.end(); ++it) + { + if (!Cursor.begin(it->u8OptId, it->cbRawOpt)) + break; + Cursor.put(it->au8RawOpt, it->cbRawOpt); + + } + + for(std::map<uint8_t, RawOption>::const_iterator it = options.begin(); + it != options.end(); ++it) + { + if (!Cursor.begin(it->second.u8OptId, it->second.cbRawOpt)) + break; + Cursor.put(it->second.au8RawOpt, it->second.cbRawOpt); + + } + + Cursor.optEnd(); + + /* + */ +#if 0 + /** @todo need to see someone set this flag to check that it's correct. */ + if (!(pDhcpMsg->bp_flags & RTNET_DHCP_FLAGS_NO_BROADCAST)) + { + rc = VBoxNetUDPUnicast(m_pSession, + m_hIf, + m_pIfBuf, + m_OurAddress, + &m_OurMac, + RTNETIPV4_PORT_BOOTPS, /* sender */ + IPv4AddrBrdCast, + &BootPReplyMsg.BootPHeader->bp_chaddr.Mac, + RTNETIPV4_PORT_BOOTPC, /* receiver */ + &BootPReplyMsg, cbBooPReplyMsg); + } + else +#endif + rc = m->m_service->hlpUDPBroadcast(RTNETIPV4_PORT_BOOTPS, /* sender */ + RTNETIPV4_PORT_BOOTPC, + &m->BootPReplyMsg, + RTNET_DHCP_NORMAL_SIZE); + + AssertRCReturn(rc,rc); + + return VINF_SUCCESS; +} + + +int NetworkManager::processParameterReqList(const Client& client, const uint8_t *pu8ReqList, + int cReqList, std::vector<RawOption>& extra) +{ + const Lease l = client.lease(); + + const NetworkConfigEntity *pNetCfg = l.getConfig(); + + /* request parameter list */ + RawOption opt; + bool fIgnore; + uint8_t u8Req; + for (int idxParam = 0; idxParam < cReqList; ++idxParam) + { + fIgnore = false; + RT_ZERO(opt); + u8Req = opt.u8OptId = pu8ReqList[idxParam]; + + switch(u8Req) + { + case RTNET_DHCP_OPT_SUBNET_MASK: + ((PRTNETADDRIPV4)opt.au8RawOpt)->u = pNetCfg->netmask().u; + opt.cbRawOpt = sizeof(RTNETADDRIPV4); + + break; + + case RTNET_DHCP_OPT_ROUTERS: + case RTNET_DHCP_OPT_DNS: + { + const Ipv4AddressContainer lst = + g_ConfigurationManager->getAddressList(u8Req); + PRTNETADDRIPV4 pAddresses = (PRTNETADDRIPV4)&opt.au8RawOpt[0]; + + for (Ipv4AddressConstIterator it = lst.begin(); + it != lst.end(); + ++it) + { + *pAddresses = (*it); + pAddresses++; + opt.cbRawOpt += sizeof(RTNETADDRIPV4); + } + + if (lst.empty()) + fIgnore = true; + } + break; + case RTNET_DHCP_OPT_DOMAIN_NAME: + { + std::string domainName = g_ConfigurationManager->getString(u8Req); + if (domainName == g_ConfigurationManager->m_noString) + { + fIgnore = true; + break; + } + + char *pszDomainName = (char *)&opt.au8RawOpt[0]; + + strcpy(pszDomainName, domainName.c_str()); + opt.cbRawOpt = domainName.length(); + } + break; + default: + Log(("opt: %d is ignored\n", u8Req)); + fIgnore = true; + break; + } + + if (!fIgnore) + extra.push_back(opt); + + } + + return VINF_SUCCESS; +} + +/* Client */ +Client::Client() +{ + m = SharedPtr<ClientData>(); +} + + +void Client::initWithMac(const RTMAC& mac) +{ + m = SharedPtr<ClientData>(new ClientData()); + m->m_mac = mac; +} + + +bool Client::operator== (const RTMAC& mac) const +{ + return (m.get() && m->m_mac == mac); +} + + +const RTMAC& Client::getMacAddress() const +{ + return m->m_mac; +} + + +Lease Client::lease() +{ + if (!m.get()) return Lease::NullLease; + + if (m->fHasLease) + return Lease(*this); + else + return Lease::NullLease; +} + + +const Lease Client::lease() const +{ + return const_cast<Client *>(this)->lease(); +} + + +Client::Client(ClientData *data):m(SharedPtr<ClientData>(data)){} + +/* Lease */ +Lease::Lease() +{ + m = SharedPtr<ClientData>(); +} + + +Lease::Lease (const Client& c) +{ + m = SharedPtr<ClientData>(c.m); + if ( !m->fHasLease + || ( isExpired() + && !isInBindingPhase())) + { + m->fHasLease = true; + m->fBinding = true; + phaseStart(RTTimeMilliTS()); + } +} + + +bool Lease::isExpired() const +{ + AssertPtrReturn(m.get(), false); + + if (!m->fBinding) + return (ASMDivU64ByU32RetU32(RTTimeMilliTS() - m->u64TimestampLeasingStarted, 1000) + > m->u32LeaseExpirationPeriod); + else + return (ASMDivU64ByU32RetU32(RTTimeMilliTS() - m->u64TimestampBindingStarted, 1000) + > m->u32BindExpirationPeriod); +} + + +void Lease::expire() +{ + /* XXX: TODO */ +} + + +void Lease::phaseStart(uint64_t u64Start) +{ + if (m->fBinding) + m->u64TimestampBindingStarted = u64Start; + else + m->u64TimestampLeasingStarted = u64Start; +} + + +void Lease::bindingPhase(bool fOnOff) +{ + m->fBinding = fOnOff; +} + + +bool Lease::isInBindingPhase() const +{ + return m->fBinding; +} + + +uint64_t Lease::issued() const +{ + return m->u64TimestampLeasingStarted; +} + + +void Lease::setExpiration(uint32_t exp) +{ + if (m->fBinding) + m->u32BindExpirationPeriod = exp; + else + m->u32LeaseExpirationPeriod = exp; +} + + +uint32_t Lease::getExpiration() const +{ + if (m->fBinding) + return m->u32BindExpirationPeriod; + else + return m->u32LeaseExpirationPeriod; +} + + +RTNETADDRIPV4 Lease::getAddress() const +{ + return m->m_address; +} + + +void Lease::setAddress(RTNETADDRIPV4 address) +{ + m->m_address = address; +} + + +const NetworkConfigEntity *Lease::getConfig() const +{ + return m->pCfg; +} + + +void Lease::setConfig(NetworkConfigEntity *pCfg) +{ + m->pCfg = pCfg; +} + + +const MapOptionId2RawOption& Lease::options() const +{ + return m->options; +} + + +Lease::Lease(ClientData *pd):m(SharedPtr<ClientData>(pd)){} + + +bool Lease::toXML(xml::ElementNode *node) const +{ + bool valueAddition = node->setAttribute(tagXMLLeaseAttributeMac.c_str(), com::Utf8StrFmt("%RTmac", &m->m_mac)); + if (!valueAddition) return false; + + valueAddition = node->setAttribute(tagXMLLeaseAttributeNetwork.c_str(), com::Utf8StrFmt("%RTnaipv4", m->m_network)); + if (!valueAddition) return false; + + xml::ElementNode *address = node->createChild(tagXMLLeaseAddress.c_str()); + if (!address) return false; + + valueAddition = address->setAttribute(tagXMLAddressAttributeValue.c_str(), com::Utf8StrFmt("%RTnaipv4", m->m_address)); + if (!valueAddition) return false; + + xml::ElementNode *time = node->createChild(tagXMLLeaseTime.c_str()); + if (!time) return false; + + valueAddition = time->setAttribute(tagXMLTimeAttributeIssued.c_str(), + m->u64TimestampLeasingStarted); + if (!valueAddition) return false; + + valueAddition = time->setAttribute(tagXMLTimeAttributeExpiration.c_str(), + m->u32LeaseExpirationPeriod); + if (!valueAddition) return false; + + return true; +} + + +bool Lease::fromXML(const xml::ElementNode *node) +{ + com::Utf8Str mac; + bool valueExists = node->getAttributeValue(tagXMLLeaseAttributeMac.c_str(), mac); + if (!valueExists) return false; + int rc = RTNetStrToMacAddr(mac.c_str(), &m->m_mac); + if (RT_FAILURE(rc)) return false; + + com::Utf8Str network; + valueExists = node->getAttributeValue(tagXMLLeaseAttributeNetwork.c_str(), network); + if (!valueExists) return false; + rc = RTNetStrToIPv4Addr(network.c_str(), &m->m_network); + if (RT_FAILURE(rc)) return false; + + /* Address */ + const xml::ElementNode *address = node->findChildElement(tagXMLLeaseAddress.c_str()); + if (!address) return false; + com::Utf8Str addressValue; + valueExists = address->getAttributeValue(tagXMLAddressAttributeValue.c_str(), addressValue); + if (!valueExists) return false; + rc = RTNetStrToIPv4Addr(addressValue.c_str(), &m->m_address); + + /* Time */ + const xml::ElementNode *time = node->findChildElement(tagXMLLeaseTime.c_str()); + if (!time) return false; + + valueExists = time->getAttributeValue(tagXMLTimeAttributeIssued.c_str(), + &m->u64TimestampLeasingStarted); + if (!valueExists) return false; + m->fBinding = false; + + valueExists = time->getAttributeValue(tagXMLTimeAttributeExpiration.c_str(), + &m->u32LeaseExpirationPeriod); + if (!valueExists) return false; + + m->fHasLease = true; + return true; +} + + +const Lease Lease::NullLease; + +const Client Client::NullClient; diff --git a/src/VBox/NetworkServices/DHCP/Config.h b/src/VBox/NetworkServices/DHCP/Config.h new file mode 100644 index 00000000..7d20a72b --- /dev/null +++ b/src/VBox/NetworkServices/DHCP/Config.h @@ -0,0 +1,829 @@ +/* $Id: Config.h $ */ +/** + * This file contains declarations of DHCP config. + */ + +#ifndef _CONFIG_H_ +#define _CONFIG_H_ + +#include <iprt/asm-math.h> +#include <iprt/cpp/utils.h> +#include <VBox/com/string.h> + +#include "../NetLib/cpp/utils.h" + + +static bool operator > (const RTNETADDRIPV4& a, const RTNETADDRIPV4& b) +{ + return (b < a); +} + + +class RawOption +{ +public: + RawOption() + { + RT_ZERO(*this); + } + uint8_t u8OptId; + uint8_t cbRawOpt; + uint8_t au8RawOpt[255]; +}; + +class ClientData; +class Client; +class Lease; +class BaseConfigEntity; + +class NetworkConfigEntity; +class HostConfigEntity; +class ClientMatchCriteria; +class ConfigurationManager; + +/* + * it's a basic representation of + * of out undestanding what client is + * XXX: Client might sends Option 61 (RFC2132 9.14 "Client-identifier") signalling + * that we may identify it in special way + * + * XXX: Client might send Option 60 (RFC2132 9.13 "Vendor class undentifier") + * in response it's expected server sends Option 43 (RFC2132 8.4. "Vendor Specific Information") + */ +class Client +{ + friend class Lease; + friend class ConfigurationManager; + + public: + Client(); + void initWithMac(const RTMAC& mac); + bool operator== (const RTMAC& mac) const; + const RTMAC& getMacAddress() const; + + /** Dumps client query */ + void dump(); + + Lease lease(); + const Lease lease() const; + + public: + static const Client NullClient; + + private: + Client(ClientData *); + SharedPtr<ClientData> m; +}; + + +bool operator== (const Lease&, const Lease&); +bool operator!= (const Lease&, const Lease&); +bool operator< (const Lease&, const Lease&); + + +typedef std::map<uint8_t, RawOption> MapOptionId2RawOption; +typedef MapOptionId2RawOption::iterator MapOptionId2RawOptionIterator; +typedef MapOptionId2RawOption::const_iterator MapOptionId2RawOptionConstIterator; +typedef MapOptionId2RawOption::value_type MapOptionId2RawOptionValue; + +namespace xml { + class ElementNode; +} + +class Lease +{ + friend class Client; + friend bool operator== (const Lease&, const Lease&); + //friend int ConfigurationManager::loadFromFile(const std::string&); + friend class ConfigurationManager; + + public: + Lease(); + Lease(const Client&); + + bool isExpired() const; + void expire(); + + /* Depending on phase *Expiration and phaseStart initialize different values. */ + void bindingPhase(bool); + void phaseStart(uint64_t u64Start); + bool isInBindingPhase() const; + /* returns 0 if in binding state */ + uint64_t issued() const; + + void setExpiration(uint32_t); + uint32_t getExpiration() const; + + RTNETADDRIPV4 getAddress() const; + void setAddress(RTNETADDRIPV4); + + const NetworkConfigEntity *getConfig() const; + void setConfig(NetworkConfigEntity *); + + const MapOptionId2RawOption& options() const; + + bool toXML(xml::ElementNode *) const; + bool fromXML(const xml::ElementNode *); + + public: + static const Lease NullLease; + + private: + Lease(ClientData *); + SharedPtr<ClientData> m; +}; + + +typedef std::vector<Client> VecClient; +typedef VecClient::iterator VecClientIterator; +typedef VecClient::const_iterator VecClientConstIterator; + +typedef std::vector<RTMAC> MacAddressContainer; +typedef MacAddressContainer::iterator MacAddressIterator; + +typedef std::vector<RTNETADDRIPV4> Ipv4AddressContainer; +typedef Ipv4AddressContainer::iterator Ipv4AddressIterator; +typedef Ipv4AddressContainer::const_iterator Ipv4AddressConstIterator; + +typedef std::map<Lease, RTNETADDRIPV4> MapLease2Ip4Address; +typedef MapLease2Ip4Address::iterator MapLease2Ip4AddressIterator; +typedef MapLease2Ip4Address::const_iterator MapLease2Ip4AddressConstIterator; +typedef MapLease2Ip4Address::value_type MapLease2Ip4AddressPair; + +/** + * + */ +class ClientMatchCriteria +{ + public: + virtual bool check(const Client&) const {return false;}; +}; + + +class ORClientMatchCriteria: ClientMatchCriteria +{ + ClientMatchCriteria* m_left; + ClientMatchCriteria* m_right; + ORClientMatchCriteria(ClientMatchCriteria *left, ClientMatchCriteria *right) + { + m_left = left; + m_right = right; + } + + virtual bool check(const Client& client) const + { + return (m_left->check(client) || m_right->check(client)); + } +}; + + +class ANDClientMatchCriteria: ClientMatchCriteria +{ +public: + ANDClientMatchCriteria(ClientMatchCriteria *left, ClientMatchCriteria *right) + { + m_left = left; + m_right = right; + } + + virtual bool check(const Client& client) const + { + return (m_left->check(client) && m_right->check(client)); + } + +private: + ClientMatchCriteria* m_left; + ClientMatchCriteria* m_right; + +}; + + +class AnyClientMatchCriteria: public ClientMatchCriteria +{ +public: + virtual bool check(const Client&) const + { + return true; + } +}; + + +class MACClientMatchCriteria: public ClientMatchCriteria +{ +public: + MACClientMatchCriteria(const RTMAC& mac):m_mac(mac){} + + virtual bool check(const Client& client) const; + +private: + RTMAC m_mac; +}; + + +#if 0 +/* XXX: Later */ +class VmSlotClientMatchCriteria: public ClientMatchCriteria +{ + str::string VmName; + uint8_t u8Slot; + virtual bool check(const Client& client) + { + return ( client.VmName == VmName + && ( u8Slot == (uint8_t)~0 /* any */ + || client.u8Slot == u8Slot)); + } +}; +#endif + + +/* Option 60 */ +class ClassClientMatchCriteria: ClientMatchCriteria{}; +/* Option 61 */ +class ClientIdentifierMatchCriteria: ClientMatchCriteria{}; + + +class BaseConfigEntity +{ + public: + BaseConfigEntity(const ClientMatchCriteria *criteria = NULL, + int matchingLevel = 0) + : m_criteria(criteria), + m_MatchLevel(matchingLevel){}; + virtual ~BaseConfigEntity(){}; + /* XXX */ + int add(BaseConfigEntity *cfg) + { + m_children.push_back(cfg); + return 0; + } + + /* Should return how strong matching */ + virtual int match(Client& client, BaseConfigEntity **cfg); + virtual uint32_t expirationPeriod() const = 0; + + protected: + const ClientMatchCriteria *m_criteria; + int m_MatchLevel; + std::vector<BaseConfigEntity *> m_children; +}; + + +class NullConfigEntity: public BaseConfigEntity +{ + public: + NullConfigEntity(){} + virtual ~NullConfigEntity(){} + int add(BaseConfigEntity *) const { return 0;} + virtual uint32_t expirationPeriod() const {return 0;} +}; + + +class ConfigEntity: public BaseConfigEntity +{ + public: + /* range */ + /* match conditions */ + ConfigEntity(std::string& name, + const BaseConfigEntity *cfg, + const ClientMatchCriteria *criteria, + int matchingLevel = 0): + BaseConfigEntity(criteria, matchingLevel), + m_name(name), + m_parentCfg(cfg), + m_u32ExpirationPeriod(0) + { + unconst(m_parentCfg)->add(this); + } + + virtual uint32_t expirationPeriod() const + { + if (!m_u32ExpirationPeriod) + return m_parentCfg->expirationPeriod(); + else + return m_u32ExpirationPeriod; + } + + /* XXX: private:*/ + std::string m_name; + const BaseConfigEntity *m_parentCfg; + uint32_t m_u32ExpirationPeriod; +}; + + +/** + * Network specific entries + */ +class NetworkConfigEntity:public ConfigEntity +{ +public: + /* Address Pool matching with network declaration */ + NetworkConfigEntity(std::string name, + const BaseConfigEntity *cfg, + const ClientMatchCriteria *criteria, + int matchlvl, + const RTNETADDRIPV4& networkID, + const RTNETADDRIPV4& networkMask, + const RTNETADDRIPV4& lowerIP, + const RTNETADDRIPV4& upperIP): + ConfigEntity(name, cfg, criteria, matchlvl), + m_NetworkID(networkID), + m_NetworkMask(networkMask), + m_UpperIP(upperIP), + m_LowerIP(lowerIP) + { + }; + + NetworkConfigEntity(std::string name, + const BaseConfigEntity *cfg, + const ClientMatchCriteria *criteria, + const RTNETADDRIPV4& networkID, + const RTNETADDRIPV4& networkMask): + ConfigEntity(name, cfg, criteria, 5), + m_NetworkID(networkID), + m_NetworkMask(networkMask) + { + m_UpperIP.u = m_NetworkID.u | (~m_NetworkMask.u); + m_LowerIP.u = m_NetworkID.u; + }; + + const RTNETADDRIPV4& upperIp() const {return m_UpperIP;} + const RTNETADDRIPV4& lowerIp() const {return m_LowerIP;} + const RTNETADDRIPV4& networkId() const {return m_NetworkID;} + const RTNETADDRIPV4& netmask() const {return m_NetworkMask;} + + private: + RTNETADDRIPV4 m_NetworkID; + RTNETADDRIPV4 m_NetworkMask; + RTNETADDRIPV4 m_UpperIP; + RTNETADDRIPV4 m_LowerIP; +}; + + +/** + * Host specific entry + * Address pool is contains one element + */ +class HostConfigEntity: public NetworkConfigEntity +{ +public: + HostConfigEntity(const RTNETADDRIPV4& addr, + std::string name, + const NetworkConfigEntity *cfg, + const ClientMatchCriteria *criteria): + NetworkConfigEntity(name, + static_cast<const ConfigEntity*>(cfg), criteria, 10, + cfg->networkId(), cfg->netmask(), addr, addr) + { + /* upper addr == lower addr */ + } +}; + +class RootConfigEntity: public NetworkConfigEntity +{ +public: + RootConfigEntity(std::string name, uint32_t expirationPeriod); + virtual ~RootConfigEntity(){}; +}; + + +#if 0 +/** + * Shared regions e.g. some of configured networks declarations + * are cover each other. + * XXX: Shared Network is join on Network config entities with possible + * overlaps in address pools. for a moment we won't configure and use them them + */ +class SharedNetworkConfigEntity: public NetworkEntity +{ +public: + SharedNetworkConfigEntity(){} + int match(const Client& client) const { return m_criteria.match(client)? 3 : 0;} + + SharedNetworkConfigEntity(NetworkEntity& network) + { + Networks.push_back(network); + } + virtual ~SharedNetworkConfigEntity(){} + + std::vector<NetworkConfigEntity> Networks; +}; +#endif + +class ConfigurationManager +{ +public: + static ConfigurationManager* getConfigurationManager(); + static int extractRequestList(PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg, RawOption& rawOpt); + + int loadFromFile(const com::Utf8Str&); + int saveToFile(); + /** + * + */ + Client getClientByDhcpPacket(const RTNETBOOTP *pDhcpMsg, size_t cbDhcpMsg); + + /** + * XXX: it's could be done on DHCPOFFER or on DHCPACK (rfc2131 gives freedom here + * 3.1.2, what is strict that allocation should do address check before real + * allocation)... + */ + Lease allocateLease4Client(const Client& client, PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg); + + /** + * We call this before DHCPACK sent and after DHCPREQUEST received ... + * when requested configuration is acceptable. + */ + int commitLease4Client(Client& client); + + /** + * Expires client lease. + */ + int expireLease4Client(Client& client); + + static int findOption(uint8_t uOption, PCRTNETBOOTP pDhcpMsg, size_t cbDhcpMsg, RawOption& opt); + + NetworkConfigEntity *addNetwork(NetworkConfigEntity *pCfg, + const RTNETADDRIPV4& networkId, + const RTNETADDRIPV4& netmask, + RTNETADDRIPV4& UpperAddress, + RTNETADDRIPV4& LowerAddress); + + HostConfigEntity *addHost(NetworkConfigEntity*, const RTNETADDRIPV4&, ClientMatchCriteria*); + int addToAddressList(uint8_t u8OptId, RTNETADDRIPV4& address); + int flushAddressList(uint8_t u8OptId); + int setString(uint8_t u8OptId, const std::string& str); + const std::string& getString(uint8_t u8OptId); + const Ipv4AddressContainer& getAddressList(uint8_t u8OptId); + +private: + ConfigurationManager():m(NULL){} + void init(); + + ~ConfigurationManager(); + bool isAddressTaken(const RTNETADDRIPV4& addr, Lease& lease); + bool isAddressTaken(const RTNETADDRIPV4& addr); + +public: + /* nulls */ + const Ipv4AddressContainer m_empty; + const std::string m_noString; + +private: + struct Data; + Data *m; +}; + + +class NetworkManager +{ +public: + static NetworkManager *getNetworkManager(); + + const RTNETADDRIPV4& getOurAddress() const; + const RTNETADDRIPV4& getOurNetmask() const; + const RTMAC& getOurMac() const; + + void setOurAddress(const RTNETADDRIPV4& aAddress); + void setOurNetmask(const RTNETADDRIPV4& aNetmask); + void setOurMac(const RTMAC& aMac); + + bool handleDhcpReqDiscover(PCRTNETBOOTP pDhcpMsg, size_t cb); + bool handleDhcpReqRequest(PCRTNETBOOTP pDhcpMsg, size_t cb); + bool handleDhcpReqDecline(PCRTNETBOOTP pDhcpMsg, size_t cb); + bool handleDhcpReqRelease(PCRTNETBOOTP pDhcpMsg, size_t cb); + + void setService(const VBoxNetHlpUDPService *); +private: + NetworkManager(); + ~NetworkManager(); + + int offer4Client(const Client& lease, uint32_t u32Xid, uint8_t *pu8ReqList, int cReqList); + int ack(const Client& lease, uint32_t u32Xid, uint8_t *pu8ReqList, int cReqList); + int nak(const Client& lease, uint32_t u32Xid); + + int prepareReplyPacket4Client(const Client& client, uint32_t u32Xid); + int doReply(const Client& client, const std::vector<RawOption>& extra); + int processParameterReqList(const Client& client, const uint8_t *pu8ReqList, int cReqList, std::vector<RawOption>& extra); + +private: + struct Data; + Data *m; + +}; + + +extern const ClientMatchCriteria *g_AnyClient; +extern RootConfigEntity *g_RootConfig; +extern const NullConfigEntity *g_NullConfig; + +/** + * Helper class for stuffing DHCP options into a reply packet. + */ +class VBoxNetDhcpWriteCursor +{ +private: + uint8_t *m_pbCur; /**< The current cursor position. */ + uint8_t *m_pbEnd; /**< The end the current option space. */ + uint8_t *m_pfOverload; /**< Pointer to the flags of the overload option. */ + uint8_t m_fUsed; /**< Overload fields that have been used. */ + PRTNETDHCPOPT m_pOpt; /**< The current option. */ + PRTNETBOOTP m_pDhcp; /**< The DHCP packet. */ + bool m_fOverflowed; /**< Set if we've overflowed, otherwise false. */ + +public: + /** Instantiate an option cursor for the specified DHCP message. */ + VBoxNetDhcpWriteCursor(PRTNETBOOTP pDhcp, size_t cbDhcp) : + m_pbCur(&pDhcp->bp_vend.Dhcp.dhcp_opts[0]), + m_pbEnd((uint8_t *)pDhcp + cbDhcp), + m_pfOverload(NULL), + m_fUsed(0), + m_pOpt(NULL), + m_pDhcp(pDhcp), + m_fOverflowed(false) + { + AssertPtr(pDhcp); + Assert(cbDhcp > RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts[10])); + } + + /** Destructor. */ + ~VBoxNetDhcpWriteCursor() + { + m_pbCur = m_pbEnd = m_pfOverload = NULL; + m_pOpt = NULL; + m_pDhcp = NULL; + } + + /** + * Try use the bp_file field. + * @returns true if not overloaded, false otherwise. + */ + bool useBpFile(void) + { + if ( m_pfOverload + && (*m_pfOverload & 1)) + return false; + m_fUsed |= 1 /* bp_file flag*/; + return true; + } + + + /** + * Try overload more BOOTP fields + */ + bool overloadMore(void) + { + /* switch option area. */ + uint8_t *pbNew; + uint8_t *pbNewEnd; + uint8_t fField; + if (!(m_fUsed & 1)) + { + fField = 1; + pbNew = &m_pDhcp->bp_file[0]; + pbNewEnd = &m_pDhcp->bp_file[sizeof(m_pDhcp->bp_file)]; + } + else if (!(m_fUsed & 2)) + { + fField = 2; + pbNew = &m_pDhcp->bp_sname[0]; + pbNewEnd = &m_pDhcp->bp_sname[sizeof(m_pDhcp->bp_sname)]; + } + else + return false; + + if (!m_pfOverload) + { + /* Add an overload option. */ + *m_pbCur++ = RTNET_DHCP_OPT_OPTION_OVERLOAD; + *m_pbCur++ = fField; + m_pfOverload = m_pbCur; + *m_pbCur++ = 1; /* bp_file flag */ + } + else + *m_pfOverload |= fField; + + /* pad current option field */ + while (m_pbCur != m_pbEnd) + *m_pbCur++ = RTNET_DHCP_OPT_PAD; /** @todo not sure if this stuff is at all correct... */ + + /* switch */ + m_pbCur = pbNew; + m_pbEnd = pbNewEnd; + return true; + } + + /** + * Begin an option. + * + * @returns true on success, false if we're out of space. + * + * @param uOption The option number. + * @param cb The amount of data. + */ + bool begin(uint8_t uOption, size_t cb) + { + /* Check that the data of the previous option has all been written. */ + Assert( !m_pOpt + || (m_pbCur - m_pOpt->dhcp_len == (uint8_t *)(m_pOpt + 1))); + AssertMsg(cb <= 255, ("%#x\n", cb)); + + /* Check if we need to overload more stuff. */ + if ((uintptr_t)(m_pbEnd - m_pbCur) < cb + 2 + (m_pfOverload ? 1 : 3)) + { + m_pOpt = NULL; + if (!overloadMore()) + { + m_fOverflowed = true; + AssertMsgFailedReturn(("%u %#x\n", uOption, cb), false); + } + if ((uintptr_t)(m_pbEnd - m_pbCur) < cb + 2 + 1) + { + m_fOverflowed = true; + AssertMsgFailedReturn(("%u %#x\n", uOption, cb), false); + } + } + + /* Emit the option header. */ + m_pOpt = (PRTNETDHCPOPT)m_pbCur; + m_pOpt->dhcp_opt = uOption; + m_pOpt->dhcp_len = (uint8_t)cb; + m_pbCur += 2; + return true; + } + + /** + * Puts option data. + * + * @param pvData The data. + * @param cb The amount to put. + */ + void put(void const *pvData, size_t cb) + { + Assert(m_pOpt || m_fOverflowed); + if (RT_LIKELY(m_pOpt)) + { + Assert((uintptr_t)m_pbCur - (uintptr_t)(m_pOpt + 1) + cb <= (size_t)m_pOpt->dhcp_len); + memcpy(m_pbCur, pvData, cb); + m_pbCur += cb; + } + } + + /** + * Puts an IPv4 Address. + * + * @param IPv4Addr The address. + */ + void putIPv4Addr(RTNETADDRIPV4 IPv4Addr) + { + put(&IPv4Addr, 4); + } + + /** + * Adds an IPv4 address option. + * + * @returns true/false just like begin(). + * + * @param uOption The option number. + * @param IPv4Addr The address. + */ + bool optIPv4Addr(uint8_t uOption, RTNETADDRIPV4 IPv4Addr) + { + if (!begin(uOption, 4)) + return false; + putIPv4Addr(IPv4Addr); + return true; + } + + /** + * Adds an option taking 1 or more IPv4 address. + * + * If the vector contains no addresses, the option will not be added. + * + * @returns true/false just like begin(). + * + * @param uOption The option number. + * @param rIPv4Addrs Reference to the address vector. + */ + bool optIPv4Addrs(uint8_t uOption, std::vector<RTNETADDRIPV4> const &rIPv4Addrs) + { + size_t const c = rIPv4Addrs.size(); + if (!c) + return true; + + if (!begin(uOption, 4*c)) + return false; + for (size_t i = 0; i < c; i++) + putIPv4Addr(rIPv4Addrs[i]); + return true; + } + + /** + * Puts an 8-bit integer. + * + * @param u8 The integer. + */ + void putU8(uint8_t u8) + { + put(&u8, 1); + } + + /** + * Adds an 8-bit integer option. + * + * @returns true/false just like begin(). + * + * @param uOption The option number. + * @param u8 The integer + */ + bool optU8(uint8_t uOption, uint8_t u8) + { + if (!begin(uOption, 1)) + return false; + putU8(u8); + return true; + } + + /** + * Puts an 32-bit integer (network endian). + * + * @param u32Network The integer. + */ + void putU32(uint32_t u32) + { + put(&u32, 4); + } + + /** + * Adds an 32-bit integer (network endian) option. + * + * @returns true/false just like begin(). + * + * @param uOption The option number. + * @param u32Network The integer. + */ + bool optU32(uint8_t uOption, uint32_t u32) + { + if (!begin(uOption, 4)) + return false; + putU32(u32); + return true; + } + + /** + * Puts a std::string. + * + * @param rStr Reference to the string. + */ + void putStr(std::string const &rStr) + { + put(rStr.c_str(), rStr.size()); + } + + /** + * Adds an std::string option if the string isn't empty. + * + * @returns true/false just like begin(). + * + * @param uOption The option number. + * @param rStr Reference to the string. + */ + bool optStr(uint8_t uOption, std::string const &rStr) + { + const size_t cch = rStr.size(); + if (!cch) + return true; + + if (!begin(uOption, cch)) + return false; + put(rStr.c_str(), cch); + return true; + } + + /** + * Whether we've overflowed. + * + * @returns true on overflow, false otherwise. + */ + bool hasOverflowed(void) const + { + return m_fOverflowed; + } + + /** + * Adds the terminating END option. + * + * The END will always be added as we're reserving room for it, however, we + * might have dropped previous options due to overflows and that is what the + * return status indicates. + * + * @returns true on success, false on a (previous) overflow. + */ + bool optEnd(void) + { + Assert((uintptr_t)(m_pbEnd - m_pbCur) < 4096); + *m_pbCur++ = RTNET_DHCP_OPT_END; + return !hasOverflowed(); + } +}; + +#endif diff --git a/src/VBox/NetworkServices/DHCP/Makefile.kmk b/src/VBox/NetworkServices/DHCP/Makefile.kmk index f04b6285..623b9d92 100644 --- a/src/VBox/NetworkServices/DHCP/Makefile.kmk +++ b/src/VBox/NetworkServices/DHCP/Makefile.kmk @@ -40,17 +40,18 @@ VBoxNetDHCPHardened_NAME = VBoxNetDHCP # # VBoxNetDHCP # -VBoxNetDHCP_TEMPLATE = -VBoxNetDHCP_TEMPLATE := VBOXR3$(if-expr defined(VBOX_WITH_HARDENING),,EXE) +VBoxNetDHCP_TEMPLATE := VBOX$(if-expr defined(VBOX_WITH_HARDENING),MAINDLL,MAINCLIENTEXE) VBoxNetDHCP_SOURCES = \ VBoxNetDHCP.cpp \ + Config.cpp \ + NetworkManagerDhcp.cpp \ ../NetLib/VBoxNetIntIf.cpp \ ../NetLib/VBoxNetUDP.cpp \ - ../NetLib/VBoxNetARP.cpp + ../NetLib/VBoxNetARP.cpp \ + ../NetLib/VBoxNetBaseService.cpp \ + ../NetLib/ComHostUtils.cpp VBoxNetDHCP_LIBS = \ $(LIB_RUNTIME) VBoxNetDHCP_LDFLAGS.win = /SUBSYSTEM:windows - include $(FILE_KBUILD_SUB_FOOTER) - diff --git a/src/VBox/NetworkServices/DHCP/NetworkManagerDhcp.cpp b/src/VBox/NetworkServices/DHCP/NetworkManagerDhcp.cpp new file mode 100644 index 00000000..415e635c --- /dev/null +++ b/src/VBox/NetworkServices/DHCP/NetworkManagerDhcp.cpp @@ -0,0 +1,188 @@ +/* $Id: NetworkManagerDhcp.cpp $ */ +/** @file + * NetworkManagerDhcp - Network Manager part handling Dhcp. + */ + +/* + * Copyright (C) 2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/******************************************************************************* +* Header Files * +*******************************************************************************/ +#include <iprt/asm.h> +#include <iprt/cdefs.h> +#include <iprt/getopt.h> +#include <iprt/net.h> +#include <iprt/param.h> +#include <iprt/path.h> +#include <iprt/stream.h> +#include <iprt/time.h> +#include <iprt/string.h> + +#include "../NetLib/shared_ptr.h" + +#include <vector> +#include <list> +#include <string> +#include <map> + +#include <VBox/sup.h> +#include <VBox/intnet.h> + +#define BASE_SERVICES_ONLY +#include "../NetLib/VBoxNetBaseService.h" +#include "Config.h" +#include "ClientDataInt.h" + +/** + * The client is requesting an offer. + * + * @returns true. + * + * @param pDhcpMsg The message. + * @param cb The message size. + */ +bool NetworkManager::handleDhcpReqDiscover(PCRTNETBOOTP pDhcpMsg, size_t cb) +{ + RawOption opt; + memset(&opt, 0, sizeof(RawOption)); + /* 1. Find client */ + ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager(); + Client client = confManager->getClientByDhcpPacket(pDhcpMsg, cb); + + /* 2. Find/Bind lease for client */ + Lease lease = confManager->allocateLease4Client(client, pDhcpMsg, cb); + AssertReturn(lease != Lease::NullLease, VINF_SUCCESS); + + int rc = ConfigurationManager::extractRequestList(pDhcpMsg, cb, opt); + + /* 3. Send of offer */ + + lease.bindingPhase(true); + lease.phaseStart(RTTimeMilliTS()); + lease.setExpiration(300); /* 3 min. */ + offer4Client(client, pDhcpMsg->bp_xid, opt.au8RawOpt, opt.cbRawOpt); + + return VINF_SUCCESS; +} + + +/** + * The client is requesting an offer. + * + * @returns true. + * + * @param pDhcpMsg The message. + * @param cb The message size. + */ +bool NetworkManager::handleDhcpReqRequest(PCRTNETBOOTP pDhcpMsg, size_t cb) +{ + ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager(); + + /* 1. find client */ + Client client = confManager->getClientByDhcpPacket(pDhcpMsg, cb); + + /* 2. find bound lease */ + Lease l = client.lease(); + if (l != Lease::NullLease) + { + + if (l.isExpired()) + { + /* send client to INIT state */ + Client c(client); + nak(client, pDhcpMsg->bp_xid); + confManager->expireLease4Client(c); + return true; + } + else { + /* XXX: Validate request */ + RawOption opt; + RT_ZERO(opt); + + Client c(client); + int rc = confManager->commitLease4Client(c); + AssertRCReturn(rc, false); + + rc = ConfigurationManager::extractRequestList(pDhcpMsg, cb, opt); + AssertRCReturn(rc, false); + + ack(client, pDhcpMsg->bp_xid, opt.au8RawOpt, opt.cbRawOpt); + } + } + else + { + nak(client, pDhcpMsg->bp_xid); + } + return true; +} + + +/** + * The client is declining an offer we've made. + * + * @returns true. + * + * @param pDhcpMsg The message. + * @param cb The message size. + */ +bool NetworkManager::handleDhcpReqDecline(PCRTNETBOOTP, size_t) +{ + /** @todo Probably need to match the server IP here to work correctly with + * other servers. */ + + /* + * The client is supposed to pass us option 50, requested address, + * from the offer. We also match the lease state. Apparently the + * MAC address is not supposed to be checked here. + */ + + /** @todo this is not required in the initial implementation, do it later. */ + return true; +} + + +/** + * The client is releasing its lease - good boy. + * + * @returns true. + * + * @param pDhcpMsg The message. + * @param cb The message size. + */ +bool NetworkManager::handleDhcpReqRelease(PCRTNETBOOTP, size_t) +{ + /** @todo Probably need to match the server IP here to work correctly with + * other servers. */ + + /* + * The client may pass us option 61, client identifier, which we should + * use to find the lease by. + * + * We're matching MAC address and lease state as well. + */ + + /* + * If no client identifier or if we couldn't find a lease by using it, + * we will try look it up by the client IP address. + */ + + + /* + * If found, release it. + */ + + + /** @todo this is not required in the initial implementation, do it later. */ + return true; +} + diff --git a/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.cpp b/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.cpp index 2ea54b66..6496b8ac 100644 --- a/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.cpp +++ b/src/VBox/NetworkServices/DHCP/VBoxNetDHCP.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2009 Oracle Corporation + * Copyright (C) 2009-2011 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -24,6 +24,16 @@ /******************************************************************************* * Header Files * *******************************************************************************/ +#include <VBox/com/com.h> +#include <VBox/com/listeners.h> +#include <VBox/com/string.h> +#include <VBox/com/Guid.h> +#include <VBox/com/array.h> +#include <VBox/com/ErrorInfo.h> +#include <VBox/com/errorprint.h> +#include <VBox/com/EventQueue.h> +#include <VBox/com/VirtualBox.h> + #include <iprt/alloca.h> #include <iprt/buildconfig.h> #include <iprt/err.h> @@ -43,383 +53,102 @@ #include <VBox/vmm/vmm.h> #include <VBox/version.h> + #include "../NetLib/VBoxNetLib.h" +#include "../NetLib/shared_ptr.h" #include <vector> +#include <list> #include <string> +#include <map> + +#include "../NetLib/VBoxNetBaseService.h" +#include "../NetLib/utils.h" #ifdef RT_OS_WINDOWS /* WinMain */ # include <Windows.h> # include <stdlib.h> +# ifdef INET_ADDRSTRLEN +/* On Windows INET_ADDRSTRLEN defined as 22 Ws2ipdef.h, because it include port number */ +# undef INET_ADDRSTRLEN +# endif +# define INET_ADDRSTRLEN 16 +#else +# include <netinet/in.h> #endif +#include "Config.h" /******************************************************************************* * Structures and Typedefs * *******************************************************************************/ - -/** - * DHCP configuration item. - * - * This is all public data because I'm too lazy to do it properly right now. - */ -class VBoxNetDhcpCfg -{ -public: - /** The etheret addresses this matches config applies to. - * An empty vector means 'ANY'. */ - std::vector<RTMAC> m_MacAddresses; - /** The upper address in the range. */ - RTNETADDRIPV4 m_UpperAddr; - /** The lower address in the range. */ - RTNETADDRIPV4 m_LowerAddr; - - /** Option 1: The net mask. */ - RTNETADDRIPV4 m_SubnetMask; - /* * Option 2: The time offset. */ - /** Option 3: Routers for the subnet. */ - std::vector<RTNETADDRIPV4> m_Routers; - /* * Option 4: Time server. */ - /* * Option 5: Name server. */ - /** Option 6: Domain Name Server (DNS) */ - std::vector<RTNETADDRIPV4> m_DNSes; - /* * Option 7: Log server. */ - /* * Option 8: Cookie server. */ - /* * Option 9: LPR server. */ - /* * Option 10: Impress server. */ - /* * Option 11: Resource location server. */ - /* * Option 12: Host name. */ - std::string m_HostName; - /* * Option 13: Boot file size option. */ - /* * Option 14: Merit dump file. */ - /** Option 15: Domain name. */ - std::string m_DomainName; - /* * Option 16: Swap server. */ - /* * Option 17: Root path. */ - /* * Option 18: Extension path. */ - /* * Option 19: IP forwarding enable/disable. */ - /* * Option 20: Non-local routing enable/disable. */ - /* * Option 21: Policy filter. */ - /* * Option 22: Maximum datagram reassembly size (MRS). */ - /* * Option 23: Default IP time-to-live. */ - /* * Option 24: Path MTU aging timeout. */ - /* * Option 25: Path MTU plateau table. */ - /* * Option 26: Interface MTU. */ - /* * Option 27: All subnets are local. */ - /* * Option 28: Broadcast address. */ - /* * Option 29: Perform maximum discovery. */ - /* * Option 30: Mask supplier. */ - /* * Option 31: Perform route discovery. */ - /* * Option 32: Router solicitation address. */ - /* * Option 33: Static route. */ - /* * Option 34: Trailer encapsulation. */ - /* * Option 35: ARP cache timeout. */ - /* * Option 36: Ethernet encapsulation. */ - /* * Option 37: TCP Default TTL. */ - /* * Option 38: TCP Keepalive Interval. */ - /* * Option 39: TCP Keepalive Garbage. */ - /* * Option 40: Network Information Service (NIS) Domain. */ - /* * Option 41: Network Information Servers. */ - /* * Option 42: Network Time Protocol Servers. */ - /* * Option 43: Vendor Specific Information. */ - /* * Option 44: NetBIOS over TCP/IP Name Server (NBNS). */ - /* * Option 45: NetBIOS over TCP/IP Datagram distribution Server (NBDD). */ - /* * Option 46: NetBIOS over TCP/IP Node Type. */ - /* * Option 47: NetBIOS over TCP/IP Scope. */ - /* * Option 48: X Window System Font Server. */ - /* * Option 49: X Window System Display Manager. */ - - /** Option 51: IP Address Lease Time. */ - uint32_t m_cSecLease; - - /* * Option 64: Network Information Service+ Domain. */ - /* * Option 65: Network Information Service+ Servers. */ - /** Option 66: TFTP server name. */ - std::string m_TftpServer; - /** Address for the bp_siaddr field corresponding to m_TftpServer. */ - RTNETADDRIPV4 m_TftpServerAddr; - /** Option 67: Bootfile name. */ - std::string m_BootfileName; - - /* * Option 68: Mobile IP Home Agent. */ - /* * Option 69: Simple Mail Transport Protocol (SMPT) Server. */ - /* * Option 70: Post Office Protocol (POP3) Server. */ - /* * Option 71: Network News Transport Protocol (NNTP) Server. */ - /* * Option 72: Default World Wide Web (WWW) Server. */ - /* * Option 73: Default Finger Server. */ - /* * Option 74: Default Internet Relay Chat (IRC) Server. */ - /* * Option 75: StreetTalk Server. */ - - /* * Option 119: Domain Search. */ - - - VBoxNetDhcpCfg() - { - m_UpperAddr.u = UINT32_MAX; - m_LowerAddr.u = UINT32_MAX; - m_SubnetMask.u = UINT32_MAX; - m_cSecLease = 60*60; /* 1 hour */ - } - - /** Validates the configuration. - * @returns 0 on success, exit code + error message to stderr on failure. */ - int validate(void) - { - if ( m_UpperAddr.u == UINT32_MAX - || m_LowerAddr.u == UINT32_MAX - || m_SubnetMask.u == UINT32_MAX) - { - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: Config is missing:"); - if (m_UpperAddr.u == UINT32_MAX) - RTStrmPrintf(g_pStdErr, " --upper-ip"); - if (m_LowerAddr.u == UINT32_MAX) - RTStrmPrintf(g_pStdErr, " --lower-ip"); - if (m_SubnetMask.u == UINT32_MAX) - RTStrmPrintf(g_pStdErr, " --netmask"); - return 2; - } - - if (RT_N2H_U32(m_UpperAddr.u) < RT_N2H_U32(m_LowerAddr.u)) - { - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: The --upper-ip value is lower than the --lower-ip one!\n" - " %d.%d.%d.%d < %d.%d.%d.%d\n", - m_UpperAddr.au8[0], m_UpperAddr.au8[1], m_UpperAddr.au8[2], m_UpperAddr.au8[3], - m_LowerAddr.au8[0], m_LowerAddr.au8[1], m_LowerAddr.au8[2], m_LowerAddr.au8[3]); - return 3; - } - - /* the code goes insane if we have too many atm. lazy bird */ - uint32_t cIPs = RT_N2H_U32(m_UpperAddr.u) - RT_N2H_U32(m_LowerAddr.u); - if (cIPs > 1024) - { - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: Too many IPs between --upper-ip and --lower-ip! %d (max 1024)\n" - " %d.%d.%d.%d < %d.%d.%d.%d\n", - cIPs, - m_UpperAddr.au8[0], m_UpperAddr.au8[1], m_UpperAddr.au8[2], m_UpperAddr.au8[3], - m_LowerAddr.au8[0], m_LowerAddr.au8[1], m_LowerAddr.au8[2], m_LowerAddr.au8[3]); - return 3; - } - return 0; - } - - /** - * Is this config for one specific client? - * - * @return true / false. - */ - bool isOneSpecificClient(void) const - { - return m_LowerAddr.u == m_UpperAddr.u - && m_MacAddresses.size() > 0; - } - - /** - * Checks if this config matches the specified MAC address. - * - * @returns true / false. - * - * @param pMac The MAC address to match. - */ - bool matchesMacAddress(PCRTMAC pMac) const - { - size_t i = m_MacAddresses.size(); - if (RT_LIKELY(i < 1)) - return true; /* no entries == ALL wildcard match */ - - while (i--) - { - PCRTMAC pCur = &m_MacAddresses[i]; - if ( pCur->au16[0] == pMac->au16[0] - && pCur->au16[1] == pMac->au16[1] - && pCur->au16[2] == pMac->au16[2]) - return true; - } - return false; - } - -}; - -/** - * DHCP lease. - */ -class VBoxNetDhcpLease -{ -public: - typedef enum State - { - /** Invalid. */ - kState_Invalid = 0, - /** The lease is free / released. */ - kState_Free, - /** An offer has been made. - * Expire time indicates when the offer expires. */ - kState_Offer, - /** The lease is active. - * Expire time indicates when the lease expires. */ - kState_Active - } State; - - /** The client MAC address. */ - RTMAC m_MacAddress; - /** The IPv4 address. */ - RTNETADDRIPV4 m_IPv4Address; - - /** The current lease state. */ - State m_enmState; - /** The lease expiration time. */ - RTTIMESPEC m_ExpireTime; - /** Transaction ID. */ - uint32_t m_xid; - /** The configuration for this lease. */ - VBoxNetDhcpCfg *m_pCfg; - -public: - /** Constructor taking an IPv4 address and a Config. */ - VBoxNetDhcpLease(RTNETADDRIPV4 IPv4Addr, VBoxNetDhcpCfg *pCfg) - { - m_pCfg = pCfg; - m_IPv4Address = IPv4Addr; - - m_MacAddress.au16[0] = m_MacAddress.au16[1] = m_MacAddress.au16[2] = 0xff; - m_enmState = kState_Free; - RTTimeSpecSetSeconds(&m_ExpireTime, 0); - m_xid = UINT32_MAX; - } - - /** Destructor. */ - ~VBoxNetDhcpLease() - { - m_IPv4Address.u = UINT32_MAX; - m_pCfg = NULL; - m_MacAddress.au16[0] = m_MacAddress.au16[1] = m_MacAddress.au16[2] = 0xff; - m_enmState = kState_Free; - m_xid = UINT32_MAX; - } - - void offer(uint32_t xid); - void activate(void); - void activate(uint32_t xid); - void release(void); - bool hasExpired(void) const; - - /** - * Checks if the lease is in use or not. - * - * @returns true if active, false if free or expired. - * - * @param pNow The current time to use. Optional. - */ - bool isInUse(PCRTTIMESPEC pNow = NULL) const - { - if ( m_enmState == kState_Offer - || m_enmState == kState_Active) - { - RTTIMESPEC Now; - if (!pNow) - pNow = RTTimeNow(&Now); - return RTTimeSpecGetSeconds(&m_ExpireTime) > RTTimeSpecGetSeconds(pNow); - } - return false; - } - - /** - * Is this lease for one specific client? - * - * @return true/false. - */ - bool isOneSpecificClient(void) const - { - return m_pCfg - && m_pCfg->isOneSpecificClient(); - } - - /** - * Is this lease currently being offered to a client. - * - * @returns true / false. - */ - bool isBeingOffered(void) const - { - return m_enmState == kState_Offer - && isInUse(); - } - - /** - * Is the lease in the current config or not. - * - * When updating the config we might leave active leases behind which aren't - * included in the new config. These will have m_pCfg set to NULL and should be - * freed up when they expired. - * - * @returns true / false. - */ - bool isInCurrentConfig(void) const - { - return m_pCfg != NULL; - } -}; - /** * DHCP server instance. */ -class VBoxNetDhcp +class VBoxNetDhcp: public VBoxNetBaseService, public NATNetworkEventAdapter { public: VBoxNetDhcp(); virtual ~VBoxNetDhcp(); - int parseArgs(int argc, char **argv); - int tryGoOnline(void); - int run(void); + int init(); + void usage(void) { /* XXX: document options */ }; + int parseOpt(int rc, const RTGETOPTUNION& getOptVal); + int processFrame(void *, size_t) {return VERR_IGNORED; }; + int processGSO(PCPDMNETWORKGSO, size_t) {return VERR_IGNORED; }; + int processUDP(void *, size_t); protected: - int addConfig(VBoxNetDhcpCfg *pCfg); - void explodeConfig(void); - bool handleDhcpMsg(uint8_t uMsgType, PCRTNETBOOTP pDhcpMsg, size_t cb); bool handleDhcpReqDiscover(PCRTNETBOOTP pDhcpMsg, size_t cb); bool handleDhcpReqRequest(PCRTNETBOOTP pDhcpMsg, size_t cb); bool handleDhcpReqDecline(PCRTNETBOOTP pDhcpMsg, size_t cb); bool handleDhcpReqRelease(PCRTNETBOOTP pDhcpMsg, size_t cb); - void makeDhcpReply(uint8_t uMsgType, VBoxNetDhcpLease *pLease, PCRTNETBOOTP pDhcpMsg, size_t cb); - VBoxNetDhcpLease *findLeaseByMacAddress(PCRTMAC pMacAddress, bool fAnyState); - VBoxNetDhcpLease *findLeaseByIpv4AndMacAddresses(RTNETADDRIPV4 IPv4Addr, PCRTMAC pMacAddress, bool fAnyState); - VBoxNetDhcpLease *newLease(PCRTNETBOOTP pDhcpMsg, size_t cb); - - static uint8_t const *findOption(uint8_t uOption, PCRTNETBOOTP pDhcpMsg, size_t cb, size_t *pcbMaxOpt); - static bool findOptionIPv4Addr(uint8_t uOption, PCRTNETBOOTP pDhcpMsg, size_t cb, PRTNETADDRIPV4 pIPv4Addr); - - inline void debugPrint( int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const; void debugPrintV(int32_t iMinLevel, bool fMsg, const char *pszFmt, va_list va) const; static const char *debugDhcpName(uint8_t uMsgType); +private: + int initNoMain(); + int initWithMain(); + HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent); + int fetchAndUpdateDnsInfo(); + protected: - /** @name The server configuration data members. + /** @name The DHCP server specific configuration data members. * @{ */ - std::string m_Name; - std::string m_Network; - std::string m_TrunkName; - INTNETTRUNKTYPE m_enmTrunkType; - RTMAC m_MacAddress; - RTNETADDRIPV4 m_Ipv4Address; + /* + * XXX: what was the plan? SQL3 or plain text file? + * How it will coexists with managment from VBoxManagement, who should manage db + * in that case (VBoxManage, VBoxSVC ???) + */ std::string m_LeaseDBName; + /** @} */ - /** The current configs. */ - std::vector<VBoxNetDhcpCfg *> m_Cfgs; + /* corresponding dhcp server description in Main */ + ComPtr<IDHCPServer> m_DhcpServer; - /** The current leases. */ - std::vector<VBoxNetDhcpLease> m_Leases; + ComPtr<INATNetwork> m_NATNetwork; - /** @name The network interface - * @{ */ - PSUPDRVSESSION m_pSession; - uint32_t m_cbSendBuf; - uint32_t m_cbRecvBuf; - INTNETIFHANDLE m_hIf; /**< The handle to the network interface. */ - PINTNETBUF m_pIfBuf; /**< Interface buffer. */ - /** @} */ + /** Listener for Host DNS changes */ + ComPtr<NATNetworkListenerImpl> m_vboxListener; + /* + * We will ignore cmd line parameters IFF there will be some DHCP specific arguments + * otherwise all paramters will come from Main. + */ + bool m_fIgnoreCmdLineParameters; + + /* + * -b -n 10.0.1.2 -m 255.255.255.0 -> to the list processing in + */ + typedef struct + { + char Key; + std::string strValue; + } CMDLNPRM; + std::list<CMDLNPRM> CmdParameterll; + typedef std::list<CMDLNPRM>::iterator CmdParameterIterator; /** @name Debug stuff * @{ */ @@ -432,130 +161,68 @@ protected: }; -/******************************************************************************* -* Global Variables * -*******************************************************************************/ -/** Pointer to the DHCP server. */ -static VBoxNetDhcp *g_pDhcp; - - -/** - * Offer this lease to a client. - * - * @param xid The transaction ID. - */ -void VBoxNetDhcpLease::offer(uint32_t xid) +static inline int configGetBoundryAddress(const ComDhcpServerPtr& dhcp, bool fUpperBoundry, RTNETADDRIPV4& boundryAddress) { - m_enmState = kState_Offer; - m_xid = xid; - RTTimeNow(&m_ExpireTime); - RTTimeSpecAddSeconds(&m_ExpireTime, 60); -} + boundryAddress.u = INADDR_ANY; + HRESULT hrc; + com::Bstr strAddress; + if (fUpperBoundry) + hrc = dhcp->COMGETTER(UpperIP)(strAddress.asOutParam()); + else + hrc = dhcp->COMGETTER(LowerIP)(strAddress.asOutParam()); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); -/** - * Activate this lease (i.e. a client is now using it). - */ -void VBoxNetDhcpLease::activate(void) -{ - m_enmState = kState_Active; - RTTimeNow(&m_ExpireTime); - RTTimeSpecAddSeconds(&m_ExpireTime, m_pCfg ? m_pCfg->m_cSecLease : 60); /* m_pCfg can be NULL right now... */ + return RTNetStrToIPv4Addr(com::Utf8Str(strAddress).c_str(), &boundryAddress); } +/******************************************************************************* +* Global Variables * +*******************************************************************************/ +/** Pointer to the DHCP server. */ +static VBoxNetDhcp *g_pDhcp; -/** - * Activate this lease with a new transaction ID. - * - * @param xid The transaction ID. - * @todo check if this is really necessary. - */ -void VBoxNetDhcpLease::activate(uint32_t xid) +/* DHCP server specific options */ +static RTGETOPTDEF g_aOptionDefs[] = { - activate(); - m_xid = xid; -} - - -/** - * Release a lease either upon client request or because it didn't quite match a - * DHCP_REQUEST. - */ -void VBoxNetDhcpLease::release(void) -{ - m_enmState = kState_Free; - RTTimeNow(&m_ExpireTime); - RTTimeSpecAddSeconds(&m_ExpireTime, 5); -} - + { "--lease-db", 'D', RTGETOPT_REQ_STRING }, + { "--begin-config", 'b', RTGETOPT_REQ_NOTHING }, + { "--gateway", 'g', RTGETOPT_REQ_IPV4ADDR }, + { "--lower-ip", 'l', RTGETOPT_REQ_IPV4ADDR }, + { "--upper-ip", 'u', RTGETOPT_REQ_IPV4ADDR }, +}; /** - * Checks if the lease has expired or not. - * - * This just checks the expiration time not the state. This is so that this - * method will work for reusing RELEASEd leases when the client comes back after - * a reboot or ipconfig /renew. Callers not interested in info on released - * leases should check the state first. - * - * @returns true if expired, false if not. + * Construct a DHCP server with a default configuration. */ -bool VBoxNetDhcpLease::hasExpired() const +VBoxNetDhcp::VBoxNetDhcp():VBoxNetBaseService("VBoxNetDhcp", "VBoxNetDhcp") { - RTTIMESPEC Now; - return RTTimeSpecGetSeconds(&m_ExpireTime) > RTTimeSpecGetSeconds(RTTimeNow(&Now)); -} - + /* m_enmTrunkType = kIntNetTrunkType_WhateverNone; */ + RTMAC mac; + mac.au8[0] = 0x08; + mac.au8[1] = 0x00; + mac.au8[2] = 0x27; + mac.au8[3] = 0x40; + mac.au8[4] = 0x41; + mac.au8[5] = 0x42; + setMacAddress(mac); + RTNETADDRIPV4 address; + address.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2, 5))); + setIpv4Address(address); + setSendBufSize(8 * _1K); + setRecvBufSize(50 * _1K); -/** - * Construct a DHCP server with a default configuration. - */ -VBoxNetDhcp::VBoxNetDhcp() -{ - m_Name = "VBoxNetDhcp"; - m_Network = "VBoxNetDhcp"; - m_TrunkName = ""; - m_enmTrunkType = kIntNetTrunkType_WhateverNone; - m_MacAddress.au8[0] = 0x08; - m_MacAddress.au8[1] = 0x00; - m_MacAddress.au8[2] = 0x27; - m_MacAddress.au8[3] = 0x40; - m_MacAddress.au8[4] = 0x41; - m_MacAddress.au8[5] = 0x42; - m_Ipv4Address.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2, 5))); - - m_pSession = NIL_RTR0PTR; - m_cbSendBuf = 8192; - m_cbRecvBuf = 51200; /** @todo tune to 64 KB with help from SrvIntR0 */ - m_hIf = INTNET_HANDLE_INVALID; - m_pIfBuf = NULL; - - m_cVerbosity = 0; m_uCurMsgType = UINT8_MAX; m_cbCurMsg = 0; m_pCurMsg = NULL; memset(&m_CurHdrs, '\0', sizeof(m_CurHdrs)); -#if 0 /* enable to hack the code without a mile long argument list. */ - VBoxNetDhcpCfg *pDefCfg = new VBoxNetDhcpCfg(); - pDefCfg->m_LowerAddr.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2,100))); - pDefCfg->m_UpperAddr.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2,250))); - pDefCfg->m_SubnetMask.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8(255,255,255, 0))); - RTNETADDRIPV4 Addr; - Addr.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2, 1))); - pDefCfg->m_Routers.push_back(Addr); - Addr.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2, 2))); - pDefCfg->m_DNSes.push_back(Addr); - pDefCfg->m_DomainName = "vboxnetdhcp.org"; -#if 0 - pDefCfg->m_cSecLease = 60*60; /* 1 hour */ -#else - pDefCfg->m_cSecLease = 30; /* sec */ -#endif - pDefCfg->m_TftpServer = "10.0.2.3"; //?? - this->addConfig(pDefCfg); -#endif + m_fIgnoreCmdLineParameters = true; + + for(unsigned int i = 0; i < RT_ELEMENTS(g_aOptionDefs); ++i) + addCommandLineOption(&g_aOptionDefs[i]); } @@ -564,463 +231,101 @@ VBoxNetDhcp::VBoxNetDhcp() */ VBoxNetDhcp::~VBoxNetDhcp() { - /* - * Close the interface connection. - */ - if (m_hIf != INTNET_HANDLE_INVALID) - { - INTNETIFCLOSEREQ CloseReq; - CloseReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - CloseReq.Hdr.cbReq = sizeof(CloseReq); - CloseReq.pSession = m_pSession; - CloseReq.hIf = m_hIf; - m_hIf = INTNET_HANDLE_INVALID; - int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_CLOSE, 0, &CloseReq.Hdr); - AssertRC(rc); - } - - if (m_pSession) - { - SUPR3Term(false /*fForced*/); - m_pSession = NIL_RTR0PTR; - } } -/** - * Adds a config to the tail. - * - * @returns See VBoxNetDHCP::validate(). - * @param pCfg The config too add. - * This object will be consumed by this call! - */ -int VBoxNetDhcp::addConfig(VBoxNetDhcpCfg *pCfg) -{ - int rc = 0; - if (pCfg) - { - rc = pCfg->validate(); - if (!rc) - m_Cfgs.push_back(pCfg); - else - delete pCfg; - } - return rc; -} /** - * Explodes the config into leases. - * - * @remarks This code is brute force and not very fast nor memory efficient. - * We will have to revisit this later. + * Parse the DHCP specific arguments. * - * @remarks If an IP has been reconfigured for a fixed mac address and it's - * already leased to a client, we it won't be available until the - * client releases its lease or it expires. + * This callback caled for each paramenter so + * .... + * we nee post analisys of the parameters, at least + * for -b, -g, -l, -u, -m */ -void VBoxNetDhcp::explodeConfig(void) +int VBoxNetDhcp::parseOpt(int rc, const RTGETOPTUNION& Val) { - RTTIMESPEC Now; - RTTimeNow(&Now); + CMDLNPRM prm; - /* - * Remove all non-active leases from the vector and zapping the - * config pointers of the once left behind. - */ - std::vector<VBoxNetDhcpLease>::iterator Itr = m_Leases.begin(); - while (Itr != m_Leases.end()) - { - if (!Itr->isInUse(&Now)) - Itr = m_Leases.erase(Itr); - else - { - Itr->m_pCfg = NULL; - Itr++; - } - } + /* Ok, we've entered here, thus we can't ignore cmd line parameters anymore */ + m_fIgnoreCmdLineParameters = false; - /* - * Loop thru the configurations in reverse order, giving the last - * configs priority of the newer ones. - */ - size_t iCfg = m_Cfgs.size(); - while (iCfg-- > 0) - { - VBoxNetDhcpCfg *pCfg = m_Cfgs[iCfg]; + prm.Key = rc; - /* Expand the IP lease range. */ - uint32_t const uLast = RT_N2H_U32(pCfg->m_UpperAddr.u); - for (uint32_t i = RT_N2H_U32(pCfg->m_LowerAddr.u); i <= uLast; i++) + switch (rc) + { + case 'l': + case 'u': + case 'g': { - RTNETADDRIPV4 IPv4Addr; - IPv4Addr.u = RT_H2N_U32(i); - - /* Check if it exists and is configured. */ - VBoxNetDhcpLease *pLease = NULL; - for (size_t j = 0; j < m_Leases.size(); j++) - if (m_Leases[j].m_IPv4Address.u == IPv4Addr.u) - { - pLease = &m_Leases[j]; - break; - } - if (pLease) - { - if (!pLease->m_pCfg) - pLease->m_pCfg = pCfg; - } - else - { - /* add it. */ - VBoxNetDhcpLease NewLease(IPv4Addr, pCfg); - m_Leases.push_back(NewLease); - debugPrint(10, false, "exploseConfig: new lease %d.%d.%d.%d", - IPv4Addr.au8[0], IPv4Addr.au8[1], IPv4Addr.au8[2], IPv4Addr.au8[3]); - } + char buf[17]; + RTStrPrintf(buf, 17, "%RTnaipv4", Val.IPv4Addr.u); + prm.strValue = buf; + CmdParameterll.push_back(prm); } - } -} + break; - -/** - * Parse the arguments. - * - * @returns 0 on success, fully bitched exit code on failure. - * - * @param argc Argument count. - * @param argv Argument vector. - */ -int VBoxNetDhcp::parseArgs(int argc, char **argv) -{ - static const RTGETOPTDEF s_aOptionDefs[] = - { - { "--name", 'N', RTGETOPT_REQ_STRING }, - { "--network", 'n', RTGETOPT_REQ_STRING }, - { "--trunk-name", 't', RTGETOPT_REQ_STRING }, - { "--trunk-type", 'T', RTGETOPT_REQ_STRING }, - { "--mac-address", 'a', RTGETOPT_REQ_MACADDR }, - { "--ip-address", 'i', RTGETOPT_REQ_IPV4ADDR }, - { "--lease-db", 'D', RTGETOPT_REQ_STRING }, - { "--verbose", 'v', RTGETOPT_REQ_NOTHING }, - - { "--begin-config", 'b', RTGETOPT_REQ_NOTHING }, - { "--gateway", 'g', RTGETOPT_REQ_IPV4ADDR }, - { "--lower-ip", 'l', RTGETOPT_REQ_IPV4ADDR }, - { "--upper-ip", 'u', RTGETOPT_REQ_IPV4ADDR }, - { "--netmask", 'm', RTGETOPT_REQ_IPV4ADDR }, - }; - - RTGETOPTSTATE State; - int rc = RTGetOptInit(&State, argc, argv, &s_aOptionDefs[0], RT_ELEMENTS(s_aOptionDefs), 0, 0 /*fFlags*/); - AssertRCReturn(rc, 49); - - VBoxNetDhcpCfg *pCurCfg = NULL; - for (;;) - { - RTGETOPTUNION Val; - rc = RTGetOpt(&State, &Val); - if (!rc) + case 'b': // ignore + case 'D': // ignore break; - switch (rc) - { - case 'N': - m_Name = Val.psz; - break; - case 'n': - m_Network = Val.psz; - break; - case 't': - m_TrunkName = Val.psz; - break; - case 'T': - if (!strcmp(Val.psz, "none")) - m_enmTrunkType = kIntNetTrunkType_None; - else if (!strcmp(Val.psz, "whatever")) - m_enmTrunkType = kIntNetTrunkType_WhateverNone; - else if (!strcmp(Val.psz, "netflt")) - m_enmTrunkType = kIntNetTrunkType_NetFlt; - else if (!strcmp(Val.psz, "netadp")) - m_enmTrunkType = kIntNetTrunkType_NetAdp; - else if (!strcmp(Val.psz, "srvnat")) - m_enmTrunkType = kIntNetTrunkType_SrvNat; - else - { - RTStrmPrintf(g_pStdErr, "Invalid trunk type '%s'\n", Val.psz); - return 1; - } - break; - case 'a': - m_MacAddress = Val.MacAddr; - break; - case 'i': - m_Ipv4Address = Val.IPv4Addr; - break; - case 'd': - m_LeaseDBName = Val.psz; - break; - case 'v': - m_cVerbosity++; - break; - - /* Begin config. */ - case 'b': - rc = addConfig(pCurCfg); - if (rc) - break; - pCurCfg = NULL; - /* fall thru */ - - /* config specific ones. */ - case 'g': - case 'l': - case 'u': - case 'm': - if (!pCurCfg) - { - pCurCfg = new VBoxNetDhcpCfg(); - if (!pCurCfg) - { - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: new VBoxDhcpCfg failed\n"); - return 1; - } - } - - switch (rc) - { - case 'g': - pCurCfg->m_Routers.push_back(Val.IPv4Addr); - break; - - case 'l': - pCurCfg->m_LowerAddr = Val.IPv4Addr; - break; - - case 'u': - pCurCfg->m_UpperAddr = Val.IPv4Addr; - break; - - case 'm': - pCurCfg->m_SubnetMask = Val.IPv4Addr; - break; - - case 0: /* ignore */ break; - default: - AssertMsgFailed(("%d", rc)); - return 1; - } - break; - - case 'V': - RTPrintf("%sr%u\n", RTBldCfgVersion(), RTBldCfgRevision()); - return 1; - - case 'h': - RTPrintf("VBoxNetDHCP Version %s\n" - "(C) 2009-" VBOX_C_YEAR " " VBOX_VENDOR "\n" - "All rights reserved.\n" - "\n" - "Usage: VBoxNetDHCP <options>\n" - "\n" - "Options:\n", - RTBldCfgVersion()); - for (size_t i = 0; i < RT_ELEMENTS(s_aOptionDefs); i++) - RTPrintf(" -%c, %s\n", s_aOptionDefs[i].iShort, s_aOptionDefs[i].pszLong); - return 1; - - default: - rc = RTGetOptPrintError(rc, &Val); - RTPrintf("Use --help for more information.\n"); - return rc; - } + default: + rc = RTGetOptPrintError(rc, &Val); + RTPrintf("Use --help for more information.\n"); + return rc; } - /* - * Do the reconfig. (move this later) - */ - if (!rc) - explodeConfig(); - - return rc; + return VINF_SUCCESS; } - -/** - * Tries to connect to the internal network. - * - * @returns 0 on success, exit code + error message to stderr on failure. - */ -int VBoxNetDhcp::tryGoOnline(void) +int VBoxNetDhcp::init() { - /* - * Open the session, load ring-0 and issue the request. - */ - int rc = SUPR3Init(&m_pSession); - if (RT_FAILURE(rc)) - { - m_pSession = NIL_RTR0PTR; - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: SUPR3Init -> %Rrc", rc); - return 1; - } - - char szPath[RTPATH_MAX]; - rc = RTPathExecDir(szPath, sizeof(szPath) - sizeof("/VMMR0.r0")); - if (RT_FAILURE(rc)) - { - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: RTPathProgram -> %Rrc", rc); - return 1; - } - - rc = SUPR3LoadVMM(strcat(szPath, "/VMMR0.r0")); - if (RT_FAILURE(rc)) - { - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: SUPR3LoadVMM(\"%s\") -> %Rrc", szPath, rc); - return 1; - } + int rc = this->VBoxNetBaseService::init(); + AssertRCReturn(rc, rc); - /* - * Create the open request. - */ - INTNETOPENREQ OpenReq; - OpenReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - OpenReq.Hdr.cbReq = sizeof(OpenReq); - OpenReq.pSession = m_pSession; - strncpy(OpenReq.szNetwork, m_Network.c_str(), sizeof(OpenReq.szNetwork)); - OpenReq.szNetwork[sizeof(OpenReq.szNetwork) - 1] = '\0'; - strncpy(OpenReq.szTrunk, m_TrunkName.c_str(), sizeof(OpenReq.szTrunk)); - OpenReq.szTrunk[sizeof(OpenReq.szTrunk) - 1] = '\0'; - OpenReq.enmTrunkType = m_enmTrunkType; - OpenReq.fFlags = 0; /** @todo check this */ - OpenReq.cbSend = m_cbSendBuf; - OpenReq.cbRecv = m_cbRecvBuf; - OpenReq.hIf = INTNET_HANDLE_INVALID; + NetworkManager *netManager = NetworkManager::getNetworkManager(); - /* - * Issue the request. - */ - debugPrint(2, false, "attempting to open/create network \"%s\"...", OpenReq.szNetwork); - rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_OPEN, 0, &OpenReq.Hdr); - if (RT_SUCCESS(rc)) - { - m_hIf = OpenReq.hIf; - debugPrint(1, false, "successfully opened/created \"%s\" - hIf=%#x", OpenReq.szNetwork, m_hIf); - - /* - * Get the ring-3 address of the shared interface buffer. - */ - INTNETIFGETBUFFERPTRSREQ GetBufferPtrsReq; - GetBufferPtrsReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - GetBufferPtrsReq.Hdr.cbReq = sizeof(GetBufferPtrsReq); - GetBufferPtrsReq.pSession = m_pSession; - GetBufferPtrsReq.hIf = m_hIf; - GetBufferPtrsReq.pRing3Buf = NULL; - GetBufferPtrsReq.pRing0Buf = NIL_RTR0PTR; - rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS, 0, &GetBufferPtrsReq.Hdr); - if (RT_SUCCESS(rc)) - { - PINTNETBUF pBuf = GetBufferPtrsReq.pRing3Buf; - debugPrint(1, false, "pBuf=%p cbBuf=%d cbSend=%d cbRecv=%d", - pBuf, pBuf->cbBuf, pBuf->cbSend, pBuf->cbRecv); - m_pIfBuf = pBuf; + netManager->setOurAddress(getIpv4Address()); + netManager->setOurNetmask(getIpv4Netmask()); + netManager->setOurMac(getMacAddress()); + netManager->setService(this); - /* - * Activate the interface. - */ - INTNETIFSETACTIVEREQ ActiveReq; - ActiveReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - ActiveReq.Hdr.cbReq = sizeof(ActiveReq); - ActiveReq.pSession = m_pSession; - ActiveReq.hIf = m_hIf; - ActiveReq.fActive = true; - rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SET_ACTIVE, 0, &ActiveReq.Hdr); - if (RT_SUCCESS(rc)) - return 0; - - /* bail out */ - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_IF_SET_PROMISCUOUS_MODE,) failed, rc=%Rrc\n", rc); - } - else - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS,) failed, rc=%Rrc\n", rc); - } + if (isMainNeeded()) + rc = initWithMain(); else - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_OPEN,) failed, rc=%Rrc\n", rc); + rc = initNoMain(); + + AssertRCReturn(rc, rc); - return RT_SUCCESS(rc) ? 0 : 1; + return VINF_SUCCESS; } -/** - * Runs the DHCP server. - * - * @returns exit code + error message to stderr on failure, won't return on - * success (you must kill this process). - */ -int VBoxNetDhcp::run(void) +int VBoxNetDhcp::processUDP(void *pv, size_t cbPv) { - /* - * The loop. - */ - PINTNETRINGBUF pRingBuf = &m_pIfBuf->Recv; - for (;;) - { - /* - * Wait for a packet to become available. - */ - INTNETIFWAITREQ WaitReq; - WaitReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - WaitReq.Hdr.cbReq = sizeof(WaitReq); - WaitReq.pSession = m_pSession; - WaitReq.hIf = m_hIf; - WaitReq.cMillies = 2000; /* 2 secs - the sleep is for some reason uninterruptible... */ /** @todo fix interruptability in SrvIntNet! */ - int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_WAIT, 0, &WaitReq.Hdr); - if (RT_FAILURE(rc)) - { - if (rc == VERR_TIMEOUT || rc == VERR_INTERRUPTED) - continue; - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: VMMR0_DO_INTNET_IF_WAIT returned %Rrc\n", rc); - return 1; - } + PCRTNETBOOTP pDhcpMsg = (PCRTNETBOOTP)pv; + m_pCurMsg = pDhcpMsg; + m_cbCurMsg = cbPv; - /* - * Process the receive buffer. - */ - while (IntNetRingHasMoreToRead(pRingBuf)) + uint8_t uMsgType; + if (RTNetIPv4IsDHCPValid(NULL /* why is this here? */, pDhcpMsg, cbPv, &uMsgType)) + { + m_uCurMsgType = uMsgType; { - size_t cb; - void *pv = VBoxNetUDPMatch(m_pIfBuf, RTNETIPV4_PORT_BOOTPS, &m_MacAddress, - VBOXNETUDP_MATCH_UNICAST | VBOXNETUDP_MATCH_BROADCAST | VBOXNETUDP_MATCH_CHECKSUM - | (m_cVerbosity > 2 ? VBOXNETUDP_MATCH_PRINT_STDERR : 0), - &m_CurHdrs, &cb); - if (pv && cb) - { - PCRTNETBOOTP pDhcpMsg = (PCRTNETBOOTP)pv; - m_pCurMsg = pDhcpMsg; - m_cbCurMsg = cb; - - uint8_t uMsgType; - if (RTNetIPv4IsDHCPValid(NULL /* why is this here? */, pDhcpMsg, cb, &uMsgType)) - { - m_uCurMsgType = uMsgType; - handleDhcpMsg(uMsgType, pDhcpMsg, cb); - m_uCurMsgType = UINT8_MAX; - } - else - debugPrint(1, true, "VBoxNetDHCP: Skipping invalid DHCP packet.\n"); /** @todo handle pure bootp clients too? */ - - m_pCurMsg = NULL; - m_cbCurMsg = 0; - } - else if (VBoxNetArpHandleIt(m_pSession, m_hIf, m_pIfBuf, &m_MacAddress, m_Ipv4Address)) - { - /* nothing */ - } - - /* Advance to the next frame. */ - IntNetRingSkipFrame(pRingBuf); + /* To avoid fight with event processing thread */ + VBoxNetALock(this); + handleDhcpMsg(uMsgType, pDhcpMsg, cbPv); } + m_uCurMsgType = UINT8_MAX; } + else + debugPrint(1, true, "VBoxNetDHCP: Skipping invalid DHCP packet.\n"); /** @todo handle pure bootp clients too? */ - return 0; + m_pCurMsg = NULL; + m_cbCurMsg = 0; + + return VINF_SUCCESS; } @@ -1036,19 +341,21 @@ bool VBoxNetDhcp::handleDhcpMsg(uint8_t uMsgType, PCRTNETBOOTP pDhcpMsg, size_t { if (pDhcpMsg->bp_op == RTNETBOOTP_OP_REQUEST) { + NetworkManager *networkManager = NetworkManager::getNetworkManager(); + switch (uMsgType) { case RTNET_DHCP_MT_DISCOVER: - return handleDhcpReqDiscover(pDhcpMsg, cb); + return networkManager->handleDhcpReqDiscover(pDhcpMsg, cb); case RTNET_DHCP_MT_REQUEST: - return handleDhcpReqRequest(pDhcpMsg, cb); + return networkManager->handleDhcpReqRequest(pDhcpMsg, cb); case RTNET_DHCP_MT_DECLINE: - return handleDhcpReqDecline(pDhcpMsg, cb); + return networkManager->handleDhcpReqDecline(pDhcpMsg, cb); case RTNET_DHCP_MT_RELEASE: - return handleDhcpReqRelease(pDhcpMsg, cb); + return networkManager->handleDhcpReqRelease(pDhcpMsg, cb); case RTNET_DHCP_MT_INFORM: debugPrint(0, true, "Should we handle this?"); @@ -1062,946 +369,245 @@ bool VBoxNetDhcp::handleDhcpMsg(uint8_t uMsgType, PCRTNETBOOTP pDhcpMsg, size_t return false; } - /** - * The client is requesting an offer. - * - * @returns true. - * - * @param pDhcpMsg The message. - * @param cb The message size. - */ -bool VBoxNetDhcp::handleDhcpReqDiscover(PCRTNETBOOTP pDhcpMsg, size_t cb) -{ - /* - * The newLease() method contains logic for finding current leases - * and reusing them in case the client is forgetful. - */ - VBoxNetDhcpLease *pLease = newLease(pDhcpMsg, cb); - if (!pLease) - return false; - debugPrint(1, true, "Offering %d.%d.%d.%d to %.6Rhxs xid=%#x", - pLease->m_IPv4Address.au8[0], - pLease->m_IPv4Address.au8[1], - pLease->m_IPv4Address.au8[2], - pLease->m_IPv4Address.au8[3], - &pDhcpMsg->bp_chaddr.Mac, - pDhcpMsg->bp_xid); - pLease->offer(pDhcpMsg->bp_xid); - - makeDhcpReply(RTNET_DHCP_MT_OFFER, pLease, pDhcpMsg, cb); - return true; -} - - -/** - * The client is requesting an offer. - * - * @returns true. + * Print debug message depending on the m_cVerbosity level. * - * @param pDhcpMsg The message. - * @param cb The message size. + * @param iMinLevel The minimum m_cVerbosity level for this message. + * @param fMsg Whether to dump parts for the current DHCP message. + * @param pszFmt The message format string. + * @param va Optional arguments. */ -bool VBoxNetDhcp::handleDhcpReqRequest(PCRTNETBOOTP pDhcpMsg, size_t cb) +void VBoxNetDhcp::debugPrintV(int iMinLevel, bool fMsg, const char *pszFmt, va_list va) const { - /** @todo Probably need to match the server IP here to work correctly with - * other servers. */ - /** @todo This code isn't entirely correct and quite a bit of a hack, but it - * will have to do for now as the right thing (tm) is very complex. - * Part of the fun is verifying that the request is something we can - * and should handle. */ - - /* - * Try find the lease by the requested address + client MAC address. - */ - VBoxNetDhcpLease *pLease = NULL; - RTNETADDRIPV4 IPv4Addr; - bool fReqAddr = findOptionIPv4Addr(RTNET_DHCP_OPT_REQ_ADDR, pDhcpMsg, cb, &IPv4Addr); - if (fReqAddr) - { - fReqAddr = true; - pLease = findLeaseByIpv4AndMacAddresses(IPv4Addr, &pDhcpMsg->bp_chaddr.Mac, true /* fAnyState */); - } - - /* - * Try find the lease by the client IP address + client MAC address. - */ - if ( !pLease - && pDhcpMsg->bp_ciaddr.u) - pLease = findLeaseByIpv4AndMacAddresses(pDhcpMsg->bp_ciaddr, &pDhcpMsg->bp_chaddr.Mac, true /* fAnyState */); - -#if 0 /** @todo client id stuff - it doesn't make sense here imho, we need IP + MAC. What would make sense - though is to compare the client id with what we've got in the lease and use it to root out - bad requests. */ - /* - * Try find the lease by using the client id. - */ - if (!pLease) + if (iMinLevel <= m_cVerbosity) { - size_t cbClientID = 0; - uint8_t const *pbClientID = findOption(RTNET_DHCP_OPT_CLIENT_ID, pDhcpMsg, cb, &cbClientID); - if ( pbClientID - && cbClientID == sizeof(RTMAC) + 1 - && pbClientID[0] == RTNET_ARP_ETHER - && - ) - { - pLease = findLeaseByIpv4AndMacAddresses(pDhcpMsg->bp_ciaddr, &pDhcpMsg->bp_chaddr.Mac, true /* fAnyState */); - } - } -#endif + va_list vaCopy; /* This dude is *very* special, thus the copy. */ + va_copy(vaCopy, va); + RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: %s: %N\n", iMinLevel >= 2 ? "debug" : "info", pszFmt, &vaCopy); + va_end(vaCopy); - /* - * Validate the lease that's requested. - * We've already check the MAC and IP addresses. - */ - bool fAckIt = false; - if (pLease) - { - if (pLease->isBeingOffered()) - { - if (pLease->m_xid == pDhcpMsg->bp_xid) - debugPrint(2, true, "REQUEST for offered lease."); - else - debugPrint(2, true, "REQUEST for offered lease, xid mismatch. Expected %#x, got %#x.", - pLease->m_xid, pDhcpMsg->bp_xid); - pLease->activate(pDhcpMsg->bp_xid); - fAckIt = true; - } - else if (!pLease->isInCurrentConfig()) - debugPrint(1, true, "REQUEST for obsolete lease -> NAK"); - else if (fReqAddr != (pDhcpMsg->bp_ciaddr.u != 0)) // ??? + if ( fMsg + && m_cVerbosity >= 2 + && m_pCurMsg) { - /** @todo this ain't safe. */ - debugPrint(1, true, "REQUEST for lease not on offer, assuming renewal. lease_xid=%#x bp_xid=%#x", - pLease->m_xid, pDhcpMsg->bp_xid); - fAckIt = true; - pLease->activate(pDhcpMsg->bp_xid); + /* XXX: export this to debugPrinfDhcpMsg or variant and other method export + * to base class + */ + const char *pszMsg = m_uCurMsgType != UINT8_MAX ? debugDhcpName(m_uCurMsgType) : ""; + RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: debug: %8s chaddr=%.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d siaddr=%d.%d.%d.%d xid=%#x\n", + pszMsg, + &m_pCurMsg->bp_chaddr, + m_pCurMsg->bp_ciaddr.au8[0], m_pCurMsg->bp_ciaddr.au8[1], m_pCurMsg->bp_ciaddr.au8[2], m_pCurMsg->bp_ciaddr.au8[3], + m_pCurMsg->bp_yiaddr.au8[0], m_pCurMsg->bp_yiaddr.au8[1], m_pCurMsg->bp_yiaddr.au8[2], m_pCurMsg->bp_yiaddr.au8[3], + m_pCurMsg->bp_siaddr.au8[0], m_pCurMsg->bp_siaddr.au8[1], m_pCurMsg->bp_siaddr.au8[2], m_pCurMsg->bp_siaddr.au8[3], + m_pCurMsg->bp_xid); } - else - debugPrint(1, true, "REQUEST for lease not on offer, NAK it."); - } - - /* - * NAK if if no lease was found. - */ - if (fAckIt) - { - debugPrint(1, false, "ACK'ing DHCP_REQUEST"); - makeDhcpReply(RTNET_DHCP_MT_ACK, pLease, pDhcpMsg, cb); - } - else - { - debugPrint(1, false, "NAK'ing DHCP_REQUEST"); - makeDhcpReply(RTNET_DHCP_MT_NAC, NULL, pDhcpMsg, cb); } - - return true; } /** - * The client is declining an offer we've made. - * - * @returns true. - * - * @param pDhcpMsg The message. - * @param cb The message size. - */ -bool VBoxNetDhcp::handleDhcpReqDecline(PCRTNETBOOTP pDhcpMsg, size_t cb) -{ - /** @todo Probably need to match the server IP here to work correctly with - * other servers. */ - - /* - * The client is supposed to pass us option 50, requested address, - * from the offer. We also match the lease state. Apparently the - * MAC address is not supposed to be checked here. - */ - - /** @todo this is not required in the initial implementation, do it later. */ - debugPrint(1, true, "DECLINE is not implemented"); - return true; -} - - -/** - * The client is releasing its lease - good boy. - * - * @returns true. + * Gets the name of given DHCP message type. * - * @param pDhcpMsg The message. - * @param cb The message size. - */ -bool VBoxNetDhcp::handleDhcpReqRelease(PCRTNETBOOTP pDhcpMsg, size_t cb) -{ - /** @todo Probably need to match the server IP here to work correctly with - * other servers. */ - - /* - * The client may pass us option 61, client identifier, which we should - * use to find the lease by. - * - * We're matching MAC address and lease state as well. - */ - - /* - * If no client identifier or if we couldn't find a lease by using it, - * we will try look it up by the client IP address. - */ - - - /* - * If found, release it. - */ - - - /** @todo this is not required in the initial implementation, do it later. */ - debugPrint(1, true, "RELEASE is not implemented"); - return true; -} - - -/** - * Helper class for stuffing DHCP options into a reply packet. + * @returns Readonly name. + * @param uMsgType The message number. */ -class VBoxNetDhcpWriteCursor +/* static */ const char *VBoxNetDhcp::debugDhcpName(uint8_t uMsgType) { -private: - uint8_t *m_pbCur; /**< The current cursor position. */ - uint8_t *m_pbEnd; /**< The end the current option space. */ - uint8_t *m_pfOverload; /**< Pointer to the flags of the overload option. */ - uint8_t m_fUsed; /**< Overload fields that have been used. */ - PRTNETDHCPOPT m_pOpt; /**< The current option. */ - PRTNETBOOTP m_pDhcp; /**< The DHCP packet. */ - bool m_fOverflowed; /**< Set if we've overflowed, otherwise false. */ - -public: - /** Instantiate an option cursor for the specified DHCP message. */ - VBoxNetDhcpWriteCursor(PRTNETBOOTP pDhcp, size_t cbDhcp) : - m_pbCur(&pDhcp->bp_vend.Dhcp.dhcp_opts[0]), - m_pbEnd((uint8_t *)pDhcp + cbDhcp), - m_pfOverload(NULL), - m_fUsed(0), - m_pOpt(NULL), - m_pDhcp(pDhcp), - m_fOverflowed(false) - { - AssertPtr(pDhcp); - Assert(cbDhcp > RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts[10])); - } - - /** Destructor. */ - ~VBoxNetDhcpWriteCursor() - { - m_pbCur = m_pbEnd = m_pfOverload = NULL; - m_pOpt = NULL; - m_pDhcp = NULL; - } - - /** - * Try use the bp_file field. - * @returns true if not overloaded, false otherwise. - */ - bool useBpFile(void) + switch (uMsgType) { - if ( m_pfOverload - && (*m_pfOverload & 1)) - return false; - m_fUsed |= 1 /* bp_file flag*/; - return true; + case 0: return "MT_00"; + case RTNET_DHCP_MT_DISCOVER: return "DISCOVER"; + case RTNET_DHCP_MT_OFFER: return "OFFER"; + case RTNET_DHCP_MT_REQUEST: return "REQUEST"; + case RTNET_DHCP_MT_DECLINE: return "DECLINE"; + case RTNET_DHCP_MT_ACK: return "ACK"; + case RTNET_DHCP_MT_NAC: return "NAC"; + case RTNET_DHCP_MT_RELEASE: return "RELEASE"; + case RTNET_DHCP_MT_INFORM: return "INFORM"; + case 9: return "MT_09"; + case 10: return "MT_0a"; + case 11: return "MT_0b"; + case 12: return "MT_0c"; + case 13: return "MT_0d"; + case 14: return "MT_0e"; + case 15: return "MT_0f"; + case 16: return "MT_10"; + case 17: return "MT_11"; + case 18: return "MT_12"; + case 19: return "MT_13"; + case UINT8_MAX: return "MT_ff"; + default: return "UNKNOWN"; } +} - /** - * Try overload more BOOTP fields - */ - bool overloadMore(void) - { - /* switch option area. */ - uint8_t *pbNew; - uint8_t *pbNewEnd; - uint8_t fField; - if (!(m_fUsed & 1)) - { - fField = 1; - pbNew = &m_pDhcp->bp_file[0]; - pbNewEnd = &m_pDhcp->bp_file[sizeof(m_pDhcp->bp_file)]; - } - else if (!(m_fUsed & 2)) - { - fField = 2; - pbNew = &m_pDhcp->bp_sname[0]; - pbNewEnd = &m_pDhcp->bp_sname[sizeof(m_pDhcp->bp_sname)]; - } - else - return false; +int VBoxNetDhcp::initNoMain() +{ + CmdParameterIterator it; - if (!m_pfOverload) - { - /* Add an overload option. */ - *m_pbCur++ = RTNET_DHCP_OPT_OPTION_OVERLOAD; - *m_pbCur++ = fField; - m_pfOverload = m_pbCur; - *m_pbCur++ = 1; /* bp_file flag */ - } - else - *m_pfOverload |= fField; + RTNETADDRIPV4 address = getIpv4Address(); + RTNETADDRIPV4 netmask = getIpv4Netmask(); + RTNETADDRIPV4 networkId; + networkId.u = address.u & netmask.u; - /* pad current option field */ - while (m_pbCur != m_pbEnd) - *m_pbCur++ = RTNET_DHCP_OPT_PAD; /** @todo not sure if this stuff is at all correct... */ + RTNETADDRIPV4 UpperAddress; + RTNETADDRIPV4 LowerAddress = networkId; + UpperAddress.u = RT_H2N_U32(RT_N2H_U32(LowerAddress.u) | RT_N2H_U32(netmask.u)); - /* switch */ - m_pbCur = pbNew; - m_pbEnd = pbNewEnd; - return true; - } - - /** - * Begin an option. - * - * @returns true on success, false if we're out of space. - * - * @param uOption The option number. - * @param cb The amount of data. - */ - bool begin(uint8_t uOption, size_t cb) + for (it = CmdParameterll.begin(); it != CmdParameterll.end(); ++it) { - /* Check that the data of the previous option has all been written. */ - Assert( !m_pOpt - || (m_pbCur - m_pOpt->dhcp_len == (uint8_t *)(m_pOpt + 1))); - AssertMsg(cb <= 255, ("%#x\n", cb)); - - /* Check if we need to overload more stuff. */ - if ((uintptr_t)(m_pbEnd - m_pbCur) < cb + 2 + (m_pfOverload ? 1 : 3)) + switch(it->Key) { - m_pOpt = NULL; - if (!overloadMore()) - { - m_fOverflowed = true; - AssertMsgFailedReturn(("%u %#x\n", uOption, cb), false); - } - if ((uintptr_t)(m_pbEnd - m_pbCur) < cb + 2 + 1) - { - m_fOverflowed = true; - AssertMsgFailedReturn(("%u %#x\n", uOption, cb), false); - } - } + case 'l': + RTNetStrToIPv4Addr(it->strValue.c_str(), &LowerAddress); + break; - /* Emit the option header. */ - m_pOpt = (PRTNETDHCPOPT)m_pbCur; - m_pOpt->dhcp_opt = uOption; - m_pOpt->dhcp_len = (uint8_t)cb; - m_pbCur += 2; - return true; - } + case 'u': + RTNetStrToIPv4Addr(it->strValue.c_str(), &UpperAddress); + break; + case 'b': + break; - /** - * Puts option data. - * - * @param pvData The data. - * @param cb The amount to put. - */ - void put(void const *pvData, size_t cb) - { - Assert(m_pOpt || m_fOverflowed); - if (RT_LIKELY(m_pOpt)) - { - Assert((uintptr_t)m_pbCur - (uintptr_t)(m_pOpt + 1) + cb <= (size_t)m_pOpt->dhcp_len); - memcpy(m_pbCur, pvData, cb); - m_pbCur += cb; } } - /** - * Puts an IPv4 Address. - * - * @param IPv4Addr The address. - */ - void putIPv4Addr(RTNETADDRIPV4 IPv4Addr) - { - put(&IPv4Addr, 4); - } + ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager(); + AssertPtrReturn(confManager, VERR_INTERNAL_ERROR); + confManager->addNetwork(unconst(g_RootConfig), + networkId, + netmask, + LowerAddress, + UpperAddress); - /** - * Adds an IPv4 address option. - * - * @returns true/false just like begin(). - * - * @param uOption The option number. - * @param IPv4Addr The address. - */ - bool optIPv4Addr(uint8_t uOption, RTNETADDRIPV4 IPv4Addr) - { - if (!begin(uOption, 4)) - return false; - putIPv4Addr(IPv4Addr); - return true; - } + return VINF_SUCCESS; +} - /** - * Adds an option taking 1 or more IPv4 address. - * - * If the vector contains no addresses, the option will not be added. - * - * @returns true/false just like begin(). - * - * @param uOption The option number. - * @param rIPv4Addrs Reference to the address vector. - */ - bool optIPv4Addrs(uint8_t uOption, std::vector<RTNETADDRIPV4> const &rIPv4Addrs) - { - size_t const c = rIPv4Addrs.size(); - if (!c) - return true; - - if (!begin(uOption, 4*c)) - return false; - for (size_t i = 0; i < c; i++) - putIPv4Addr(rIPv4Addrs[i]); - return true; - } - /** - * Puts an 8-bit integer. - * - * @param u8 The integer. +int VBoxNetDhcp::initWithMain() +{ + /* ok, here we should initiate instance of dhcp server + * and listener for Dhcp configuration events */ - void putU8(uint8_t u8) - { - put(&u8, 1); - } + AssertRCReturn(virtualbox.isNull(), VERR_INTERNAL_ERROR); + std::string networkName = getNetwork(); - /** - * Adds an 8-bit integer option. - * - * @returns true/false just like begin(). - * - * @param uOption The option number. - * @param u8 The integer - */ - bool optU8(uint8_t uOption, uint8_t u8) - { - if (!begin(uOption, 1)) - return false; - putU8(u8); - return true; - } + int rc = findDhcpServer(virtualbox, networkName, m_DhcpServer); + AssertRCReturn(rc, rc); - /** - * Puts an 32-bit integer (network endian). - * - * @param u32Network The integer. - */ - void putU32(uint32_t u32) - { - put(&u32, 4); - } + rc = findNatNetwork(virtualbox, networkName, m_NATNetwork); + AssertRCReturn(rc, rc); - /** - * Adds an 32-bit integer (network endian) option. - * - * @returns true/false just like begin(). - * - * @param uOption The option number. - * @param u32Network The integer. - */ - bool optU32(uint8_t uOption, uint32_t u32) - { - if (!begin(uOption, 4)) - return false; - putU32(u32); - return true; - } + BOOL fNeedDhcpServer = isDhcpRequired(m_NATNetwork); + if (!fNeedDhcpServer) + return VERR_CANCELLED; - /** - * Puts a std::string. - * - * @param rStr Reference to the string. - */ - void putStr(std::string const &rStr) - { - put(rStr.c_str(), rStr.size()); - } + RTNETADDRIPV4 gateway; + com::Bstr strGateway; + HRESULT hrc = m_NATNetwork->COMGETTER(Gateway)(strGateway.asOutParam()); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + RTNetStrToIPv4Addr(com::Utf8Str(strGateway).c_str(), &gateway); - /** - * Adds an std::string option if the string isn't empty. - * - * @returns true/false just like begin(). - * - * @param uOption The option number. - * @param rStr Reference to the string. - */ - bool optStr(uint8_t uOption, std::string const &rStr) - { - const size_t cch = rStr.size(); - if (!cch) - return true; - - if (!begin(uOption, cch)) - return false; - put(rStr.c_str(), cch); - return true; - } + ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager(); + AssertPtrReturn(confManager, VERR_INTERNAL_ERROR); + confManager->addToAddressList(RTNET_DHCP_OPT_ROUTERS, gateway); - /** - * Whether we've overflowed. - * - * @returns true on overflow, false otherwise. - */ - bool hasOverflowed(void) const - { - return m_fOverflowed; - } + rc = fetchAndUpdateDnsInfo(); + AssertMsgRCReturn(rc, ("Wasn't able to fetch Dns info"), rc); - /** - * Adds the terminating END option. - * - * The END will always be added as we're reserving room for it, however, we - * might have dropped previous options due to overflows and that is what the - * return status indicates. - * - * @returns true on success, false on a (previous) overflow. - */ - bool optEnd(void) - { - Assert((uintptr_t)(m_pbEnd - m_pbCur) < 4096); - *m_pbCur++ = RTNET_DHCP_OPT_END; - return !hasOverflowed(); - } -}; + ComEventTypeArray aVBoxEvents; + aVBoxEvents.push_back(VBoxEventType_OnHostNameResolutionConfigurationChange); + rc = createNatListener(m_vboxListener, virtualbox, this, aVBoxEvents); + AssertRCReturn(rc, rc); + RTNETADDRIPV4 LowerAddress; + rc = configGetBoundryAddress(m_DhcpServer, false, LowerAddress); + AssertMsgRCReturn(rc, ("can't get lower boundrary adderss'"),rc); -/** - * Constructs and sends a reply to a client. - * - * @returns - * @param uMsgType The DHCP message type. - * @param pLease The lease. This can be NULL for some replies. - * @param pDhcpMsg The client message. We will dig out the MAC address, - * transaction ID, and requested options from this. - * @param cb The size of the client message. - */ -void VBoxNetDhcp::makeDhcpReply(uint8_t uMsgType, VBoxNetDhcpLease *pLease, PCRTNETBOOTP pDhcpMsg, size_t cb) -{ - size_t cbReply = RTNET_DHCP_NORMAL_SIZE; /** @todo respect the RTNET_DHCP_OPT_MAX_DHCP_MSG_SIZE option */ - PRTNETBOOTP pReply = (PRTNETBOOTP)alloca(cbReply); + RTNETADDRIPV4 UpperAddress; + rc = configGetBoundryAddress(m_DhcpServer, true, UpperAddress); + AssertMsgRCReturn(rc, ("can't get upper boundrary adderss'"),rc); - /* - * The fixed bits stuff. - */ - pReply->bp_op = RTNETBOOTP_OP_REPLY; - pReply->bp_htype = RTNET_ARP_ETHER; - pReply->bp_hlen = sizeof(RTMAC); - pReply->bp_hops = 0; - pReply->bp_xid = pDhcpMsg->bp_xid; - pReply->bp_secs = 0; - pReply->bp_flags = 0; // (pDhcpMsg->bp_flags & RTNET_DHCP_FLAGS_NO_BROADCAST); ?? - pReply->bp_ciaddr.u = 0; - pReply->bp_yiaddr.u = pLease ? pLease->m_IPv4Address.u : 0xffffffff; - pReply->bp_siaddr.u = pLease && pLease->m_pCfg ? pLease->m_pCfg->m_TftpServerAddr.u : 0; /* (next server == TFTP)*/ - pReply->bp_giaddr.u = 0; - memset(&pReply->bp_chaddr, '\0', sizeof(pReply->bp_chaddr)); - pReply->bp_chaddr.Mac = pDhcpMsg->bp_chaddr.Mac; - memset(&pReply->bp_sname[0], '\0', sizeof(pReply->bp_sname)); - memset(&pReply->bp_file[0], '\0', sizeof(pReply->bp_file)); - pReply->bp_vend.Dhcp.dhcp_cookie = RT_H2N_U32_C(RTNET_DHCP_COOKIE); - memset(&pReply->bp_vend.Dhcp.dhcp_opts[0], '\0', RTNET_DHCP_OPT_SIZE); + RTNETADDRIPV4 address = getIpv4Address(); + RTNETADDRIPV4 netmask = getIpv4Netmask(); + RTNETADDRIPV4 networkId = networkid(address, netmask); + std::string name = std::string("default"); - /* - * The options - use a cursor class for dealing with the ugly stuff. - */ - VBoxNetDhcpWriteCursor Cursor(pReply, cbReply); + confManager->addNetwork(unconst(g_RootConfig), + networkId, + netmask, + LowerAddress, + UpperAddress); - /* The basics */ - Cursor.optU8(RTNET_DHCP_OPT_MSG_TYPE, uMsgType); - Cursor.optIPv4Addr(RTNET_DHCP_OPT_SERVER_ID, m_Ipv4Address); + com::Bstr bstr; + hrc = virtualbox->COMGETTER(HomeFolder)(bstr.asOutParam()); + com::Utf8StrFmt strXmlLeaseFile("%ls%c%s.leases", + bstr.raw(), RTPATH_DELIMITER, networkName.c_str()); + confManager->loadFromFile(strXmlLeaseFile); - if (uMsgType != RTNET_DHCP_MT_NAC) - { - AssertReturnVoid(pLease && pLease->m_pCfg); - const VBoxNetDhcpCfg *pCfg = pLease->m_pCfg; /* no need to retain it. */ - - /* The IP config. */ - Cursor.optU32(RTNET_DHCP_OPT_LEASE_TIME, RT_H2N_U32(pCfg->m_cSecLease)); - Cursor.optIPv4Addr(RTNET_DHCP_OPT_SUBNET_MASK, pCfg->m_SubnetMask); - Cursor.optIPv4Addrs(RTNET_DHCP_OPT_ROUTERS, pCfg->m_Routers); - Cursor.optIPv4Addrs(RTNET_DHCP_OPT_ROUTERS, pCfg->m_DNSes); - Cursor.optStr(RTNET_DHCP_OPT_HOST_NAME, pCfg->m_HostName); - Cursor.optStr(RTNET_DHCP_OPT_DOMAIN_NAME, pCfg->m_DomainName); - - /* The PXE config. */ - if (pCfg->m_BootfileName.size()) - { - if (Cursor.useBpFile()) - RTStrPrintf((char *)&pReply->bp_file[0], sizeof(pReply->bp_file), "%s", pCfg->m_BootfileName.c_str()); - else - Cursor.optStr(RTNET_DHCP_OPT_BOOTFILE_NAME, pCfg->m_BootfileName); - } - } - - /* Terminate the options. */ - if (!Cursor.optEnd()) - debugPrint(0, true, "option overflow\n"); - - /* - * Send it. - */ - int rc; -#if 0 - if (!(pDhcpMsg->bp_flags & RTNET_DHCP_FLAGS_NO_BROADCAST)) /** @todo need to see someone set this flag to check that it's correct. */ - { - RTNETADDRIPV4 IPv4AddrBrdCast; - IPv4AddrBrdCast.u = UINT32_C(0xffffffff); /* broadcast IP */ - rc = VBoxNetUDPUnicast(m_pSession, m_hIf, m_pIfBuf, - m_Ipv4Address, &m_MacAddress, RTNETIPV4_PORT_BOOTPS, /* sender */ - IPv4AddrBrdCast, &pDhcpMsg->bp_chaddr.Mac, RTNETIPV4_PORT_BOOTPC, /* receiver */ - pReply, cbReply); - } - else -#endif - rc = VBoxNetUDPBroadcast(m_pSession, m_hIf, m_pIfBuf, - m_Ipv4Address, &m_MacAddress, RTNETIPV4_PORT_BOOTPS, /* sender */ - RTNETIPV4_PORT_BOOTPC, /* receiver port */ - pReply, cbReply); - if (RT_FAILURE(rc)) - debugPrint(0, true, "error %Rrc when sending the reply", rc); + return VINF_SUCCESS; } -/** - * Look up a lease by MAC address. - * - * @returns Pointer to the lease if found, NULL if not found. - * @param pMacAddress The mac address. - * @param fAnyState Any state. - */ -VBoxNetDhcpLease *VBoxNetDhcp::findLeaseByMacAddress(PCRTMAC pMacAddress, bool fAnyState) +int VBoxNetDhcp::fetchAndUpdateDnsInfo() { - size_t iLease = m_Leases.size(); - while (iLease-- > 0) + ComHostPtr host; + if (SUCCEEDED(virtualbox->COMGETTER(Host)(host.asOutParam()))) { - VBoxNetDhcpLease *pLease = &m_Leases[iLease]; - if ( pLease - && pLease->m_MacAddress.au16[0] == pMacAddress->au16[0] - && pLease->m_MacAddress.au16[1] == pMacAddress->au16[1] - && pLease->m_MacAddress.au16[2] == pMacAddress->au16[2] - && ( fAnyState - || (pLease->m_enmState != VBoxNetDhcpLease::kState_Free)) ) - return pLease; - } + AddressToOffsetMapping mapIp4Addr2Off; + int rc = localMappings(m_NATNetwork, mapIp4Addr2Off); + /* XXX: here could be several cases: 1. COM error, 2. not found (empty) 3. ? */ + AssertMsgRCReturn(rc, ("Can't fetch local mappings"), rc); - return NULL; -} + RTNETADDRIPV4 address = getIpv4Address(); + RTNETADDRIPV4 netmask = getIpv4Netmask(); + AddressList nameservers; + rc = hostDnsServers(host, networkid(address, netmask), mapIp4Addr2Off, nameservers); + AssertMsgRCReturn(rc, ("Debug me!!!"), rc); + /* XXX: Search strings */ -/** - * Look up a lease by IPv4 and MAC addresses. - * - * @returns Pointer to the lease if found, NULL if not found. - * @param IPv4Addr The IPv4 address. - * @param pMacAddress The mac address. - * @param fAnyState Any state. - */ -VBoxNetDhcpLease *VBoxNetDhcp::findLeaseByIpv4AndMacAddresses(RTNETADDRIPV4 IPv4Addr, PCRTMAC pMacAddress, bool fAnyState) -{ - size_t iLease = m_Leases.size(); - while (iLease-- > 0) - { - VBoxNetDhcpLease *pLease = &m_Leases[iLease]; - if ( pLease - && pLease->m_IPv4Address.u == IPv4Addr.u - && pLease->m_MacAddress.au16[0] == pMacAddress->au16[0] - && pLease->m_MacAddress.au16[1] == pMacAddress->au16[1] - && pLease->m_MacAddress.au16[2] == pMacAddress->au16[2] - && ( fAnyState - || (pLease->m_enmState != VBoxNetDhcpLease::kState_Free)) ) - return pLease; - } - - return NULL; -} - + std::string domain; + rc = hostDnsDomain(host, domain); + AssertMsgRCReturn(rc, ("Debug me!!"), rc); -/** - * Creates a new lease for the client specified in the DHCP message. - * - * The caller has already made sure it doesn't already have a lease. - * - * @returns Pointer to the lease if found, NULL+log if not found. - * @param IPv4Addr The IPv4 address. - * @param pMacAddress The MAC address. - */ -VBoxNetDhcpLease *VBoxNetDhcp::newLease(PCRTNETBOOTP pDhcpMsg, size_t cb) -{ - RTMAC const MacAddr = pDhcpMsg->bp_chaddr.Mac; - RTTIMESPEC Now; - RTTimeNow(&Now); - - /* - * Search the possible leases. - * - * We'll try do all the searches in one pass, that is to say, perfect - * match, old lease, and next free/expired lease. - */ - VBoxNetDhcpLease *pBest = NULL; - VBoxNetDhcpLease *pOld = NULL; - VBoxNetDhcpLease *pFree = NULL; - - size_t cLeases = m_Leases.size(); - for (size_t i = 0; i < cLeases; i++) - { - VBoxNetDhcpLease *pCur = &m_Leases[i]; - - /* Skip it if no configuration, that means its not in the current config. */ - if (!pCur->m_pCfg) - continue; - - /* best */ - if ( pCur->isOneSpecificClient() - && pCur->m_pCfg->matchesMacAddress(&MacAddr)) { - if ( !pBest - || pBest->m_pCfg->m_MacAddresses.size() < pCur->m_pCfg->m_MacAddresses.size()) - pBest = pCur; - } + VBoxNetALock(this); + ConfigurationManager *confManager = ConfigurationManager::getConfigurationManager(); + confManager->flushAddressList(RTNET_DHCP_OPT_DNS); - /* old lease */ - if ( pCur->m_MacAddress.au16[0] == MacAddr.au16[0] - && pCur->m_MacAddress.au16[1] == MacAddr.au16[1] - && pCur->m_MacAddress.au16[2] == MacAddr.au16[2]) - { - if ( !pOld - || RTTimeSpecGetSeconds(&pCur->m_ExpireTime) > RTTimeSpecGetSeconds(&pFree->m_ExpireTime)) - pOld = pCur; - } + for (AddressList::iterator it = nameservers.begin(); it != nameservers.end(); ++it) + confManager->addToAddressList(RTNET_DHCP_OPT_DNS, *it); - /* expired lease */ - if (!pCur->isInUse(&Now)) - { - if ( !pFree - || RTTimeSpecGetSeconds(&pCur->m_ExpireTime) < RTTimeSpecGetSeconds(&pFree->m_ExpireTime)) - pFree = pCur; + confManager->setString(RTNET_DHCP_OPT_DOMAIN_NAME, domain); } } - VBoxNetDhcpLease *pNew = pBest; - if (!pNew) - pNew = pOld; - if (!pNew) - pNew = pFree; - if (!pNew) - { - debugPrint(0, true, "No more leases."); - return NULL; - } - - /* - * Init the lease. - */ - pNew->m_MacAddress = MacAddr; - pNew->m_xid = pDhcpMsg->bp_xid; - /** @todo extract the client id. */ - - return pNew; + return VINF_SUCCESS; } -/** - * Finds an option. - * - * @returns On success, a pointer to the first byte in the option data (no none - * then it'll be the byte following the 0 size field) and *pcbOpt set - * to the option length. - * On failure, NULL is returned and *pcbOpt unchanged. - * - * @param uOption The option to search for. - * @param pDhcpMsg The DHCP message. - * @param cb The size of the message. - * @param pcbOpt Where to store the option size size. Optional. Note - * that this is adjusted if the option length is larger - * than the message buffer. - */ -/* static */ const uint8_t * -VBoxNetDhcp::findOption(uint8_t uOption, PCRTNETBOOTP pDhcpMsg, size_t cb, size_t *pcbOpt) +HRESULT VBoxNetDhcp::HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent) { - Assert(uOption != RTNET_DHCP_OPT_PAD); - - /* - * Validate the DHCP bits and figure the max size of the options in the vendor field. - */ - if (cb <= RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts)) - return NULL; - if (pDhcpMsg->bp_vend.Dhcp.dhcp_cookie != RT_H2N_U32_C(RTNET_DHCP_COOKIE)) - return NULL; - size_t cbLeft = cb - RT_UOFFSETOF(RTNETBOOTP, bp_vend.Dhcp.dhcp_opts); - if (cbLeft > RTNET_DHCP_OPT_SIZE) - cbLeft = RTNET_DHCP_OPT_SIZE; - - /* - * Search the vendor field. - */ - bool fExtended = false; - uint8_t const *pb = &pDhcpMsg->bp_vend.Dhcp.dhcp_opts[0]; - while (pb && cbLeft > 0) + switch(aEventType) { - uint8_t uCur = *pb; - if (uCur == RTNET_DHCP_OPT_PAD) - { - cbLeft--; - pb++; - } - else if (cbLeft <= 1) + case VBoxEventType_OnHostNameResolutionConfigurationChange: + fetchAndUpdateDnsInfo(); break; - else - { - size_t cbCur = pb[1]; - if (cbCur > cbLeft - 2) - cbCur = cbLeft - 2; - if (uCur == uOption) - { - if (pcbOpt) - *pcbOpt = cbCur; - return pb+2; - } - pb += cbCur + 2; - cbLeft -= cbCur - 2; - } - } - - /** @todo search extended dhcp option field(s) when present */ - - return NULL; -} - - -/** - * Locates an option with an IPv4 address in the DHCP message. - * - * @returns true and *pIpv4Addr if found, false if not. - * - * @param uOption The option to find. - * @param pDhcpMsg The DHCP message. - * @param cb The size of the message. - * @param pIPv4Addr Where to put the address. - */ -/* static */ bool -VBoxNetDhcp::findOptionIPv4Addr(uint8_t uOption, PCRTNETBOOTP pDhcpMsg, size_t cb, PRTNETADDRIPV4 pIPv4Addr) -{ - size_t cbOpt; - uint8_t const *pbOpt = findOption(uOption, pDhcpMsg, cb, &cbOpt); - if (pbOpt) - { - if (cbOpt >= sizeof(RTNETADDRIPV4)) - { - *pIPv4Addr = *(PCRTNETADDRIPV4)pbOpt; - return true; - } - } - return false; -} - - -/** - * Print debug message depending on the m_cVerbosity level. - * - * @param iMinLevel The minimum m_cVerbosity level for this message. - * @param fMsg Whether to dump parts for the current DHCP message. - * @param pszFmt The message format string. - * @param ... Optional arguments. - */ -inline void VBoxNetDhcp::debugPrint(int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const -{ - if (iMinLevel <= m_cVerbosity) - { - va_list va; - va_start(va, pszFmt); - debugPrintV(iMinLevel, fMsg, pszFmt, va); - va_end(va); - } -} - - -/** - * Print debug message depending on the m_cVerbosity level. - * - * @param iMinLevel The minimum m_cVerbosity level for this message. - * @param fMsg Whether to dump parts for the current DHCP message. - * @param pszFmt The message format string. - * @param va Optional arguments. - */ -void VBoxNetDhcp::debugPrintV(int iMinLevel, bool fMsg, const char *pszFmt, va_list va) const -{ - if (iMinLevel <= m_cVerbosity) - { - va_list vaCopy; /* This dude is *very* special, thus the copy. */ - va_copy(vaCopy, va); - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: %s: %N\n", iMinLevel >= 2 ? "debug" : "info", pszFmt, &vaCopy); - va_end(vaCopy); - - if ( fMsg - && m_cVerbosity >= 2 - && m_pCurMsg) - { - const char *pszMsg = m_uCurMsgType != UINT8_MAX ? debugDhcpName(m_uCurMsgType) : ""; - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: debug: %8s chaddr=%.6Rhxs ciaddr=%d.%d.%d.%d yiaddr=%d.%d.%d.%d siaddr=%d.%d.%d.%d xid=%#x\n", - pszMsg, - &m_pCurMsg->bp_chaddr, - m_pCurMsg->bp_ciaddr.au8[0], m_pCurMsg->bp_ciaddr.au8[1], m_pCurMsg->bp_ciaddr.au8[2], m_pCurMsg->bp_ciaddr.au8[3], - m_pCurMsg->bp_yiaddr.au8[0], m_pCurMsg->bp_yiaddr.au8[1], m_pCurMsg->bp_yiaddr.au8[2], m_pCurMsg->bp_yiaddr.au8[3], - m_pCurMsg->bp_siaddr.au8[0], m_pCurMsg->bp_siaddr.au8[1], m_pCurMsg->bp_siaddr.au8[2], m_pCurMsg->bp_siaddr.au8[3], - m_pCurMsg->bp_xid); - } } -} - -/** - * Gets the name of given DHCP message type. - * - * @returns Readonly name. - * @param uMsgType The message number. - */ -/* static */ const char *VBoxNetDhcp::debugDhcpName(uint8_t uMsgType) -{ - switch (uMsgType) - { - case 0: return "MT_00"; - case RTNET_DHCP_MT_DISCOVER: return "DISCOVER"; - case RTNET_DHCP_MT_OFFER: return "OFFER"; - case RTNET_DHCP_MT_REQUEST: return "REQUEST"; - case RTNET_DHCP_MT_DECLINE: return "DECLINE"; - case RTNET_DHCP_MT_ACK: return "ACK"; - case RTNET_DHCP_MT_NAC: return "NAC"; - case RTNET_DHCP_MT_RELEASE: return "RELEASE"; - case RTNET_DHCP_MT_INFORM: return "INFORM"; - case 9: return "MT_09"; - case 10: return "MT_0a"; - case 11: return "MT_0b"; - case 12: return "MT_0c"; - case 13: return "MT_0d"; - case 14: return "MT_0e"; - case 15: return "MT_0f"; - case 16: return "MT_10"; - case 17: return "MT_11"; - case 18: return "MT_12"; - case 19: return "MT_13"; - case UINT8_MAX: return "MT_ff"; - default: return "UNKNOWN"; - } + return S_OK; } - - /** * Entry point. */ -extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv, char **envp) +extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv) { /* * Instantiate the DHCP server and hand it the options. */ + VBoxNetDhcp *pDhcp = new VBoxNetDhcp(); if (!pDhcp) { @@ -2012,14 +618,16 @@ extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv, char **envp) if (rc) return rc; + pDhcp->init(); + /* * Try connect the server to the network. */ rc = pDhcp->tryGoOnline(); - if (rc) + if (RT_FAILURE(rc)) { delete pDhcp; - return rc; + return 1; } /* @@ -2030,19 +638,19 @@ extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv, char **envp) g_pDhcp = NULL; delete pDhcp; - return rc; + return 0; } #ifndef VBOX_WITH_HARDENING -int main(int argc, char **argv, char **envp) +int main(int argc, char **argv) { int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB); if (RT_FAILURE(rc)) return RTMsgInitFailure(rc); - return TrustedMain(argc, argv, envp); + return TrustedMain(argc, argv); } # ifdef RT_OS_WINDOWS @@ -2061,7 +669,7 @@ static LRESULT CALLBACK WindowProc(HWND hwnd, return DefWindowProc (hwnd, uMsg, wParam, lParam); } -static LPCSTR g_WndClassName = "VBoxNetDHCPClass"; +static LPCWSTR g_WndClassName = L"VBoxNetDHCPClass"; static DWORD WINAPI MsgThreadProc(__in LPVOID lpParameter) { @@ -2139,7 +747,7 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine if(hThread != NULL) CloseHandle(hThread); - return main(__argc, __argv, environ); + return main(__argc, __argv); } # endif /* RT_OS_WINDOWS */ diff --git a/src/VBox/NetworkServices/DHCP/VBoxNetDHCPHardened.cpp b/src/VBox/NetworkServices/DHCP/VBoxNetDHCPHardened.cpp index 7d0e1bef..3142bbd7 100644 --- a/src/VBox/NetworkServices/DHCP/VBoxNetDHCPHardened.cpp +++ b/src/VBox/NetworkServices/DHCP/VBoxNetDHCPHardened.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2009 Oracle Corporation + * Copyright (C) 2009-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/NetworkServices/NAT/Makefile.kmk b/src/VBox/NetworkServices/NAT/Makefile.kmk index 9c821cab..6682b088 100644 --- a/src/VBox/NetworkServices/NAT/Makefile.kmk +++ b/src/VBox/NetworkServices/NAT/Makefile.kmk @@ -17,42 +17,94 @@ SUB_DEPTH = ../../../.. include $(KBUILD_PATH)/subheader.kmk +ifdef VBOX_WITH_LWIP_NAT +# XXX: do not depend on order +ifndef LWIP_SOURCES + include ../../Devices/Network/lwip-new/Makefile.kmk +endif + ifdef VBOX_WITH_HARDENING - PROGRAMS += VBoxNetNATHardened - DLLS += VBoxNetNAT + PROGRAMS += VBoxNetLwipNATHardened + DLLS += VBoxNetLwipNAT else - PROGRAMS += VBoxNetNAT + PROGRAMS += VBoxNetLwipNAT +endif + +ifdef VBOX_WITH_HARDENING + VBoxNetLwipNATHardened_SOURCES += VBoxNetNATHardened.cpp + VBoxNetLwipNATHardened_DEFS += SERVICE_NAME=\"VBoxNetNAT\" + VBoxNetLwipNATHardened_TEMPLATE=VBOXR3HARDENEDEXE + VBoxNetLwipNATHardened_NAME = VBoxNetNAT endif -VBoxNetNAT_TEMPLATE = -VBoxNetNAT_TEMPLATE := VBOXR3$(if-expr defined(VBOX_WITH_HARDENING),,EXE) -VBoxNetNAT_INCS = ../../Devices/Network/slirp -VBoxNetNAT_DEFS = VBOX_WITH_NAT_SERVICE -VBoxNetNAT_SOURCES += VBoxNetNAT.cpp -VBoxNetNAT_DEFS += VBOX_WITH_NAT_SERVICE - -#define def_vbox_slirp_service_cflags -# $(file)_DEFS += VBOX_WITH_NAT_SERVICE -#endef - -define def_vbox_slirp_service_sources - VBoxNetNAT_SOURCES += $1/$(file) -endef -VBOX_NOT_IN_NATSERVICE = Network/DrvNAT.cpp -$(foreach file,$(filter-out $(VBOX_NOT_IN_NATSERVICE), $(VBOX_SLIRP_SOURCES)),$(eval $(call def_vbox_slirp_service_sources, ../../Devices))) -$(foreach file,$(VBOX_SLIRP_ALIAS_SOURCES),$(eval $(call def_vbox_slirp_service_sources, ../../Devices))) -$(foreach file,$(VBOX_SLIRP_BSD_SOURCES),$(eval $(call def_vbox_slirp_service_sources, ../../Devices))) - -$(foreach file,$(addprefix ../../Devices/, $(VBOX_SLIRP_BSD_SOURCES)),$(eval $(call def_vbox_slirp_cflags, ../../Devices/Network))) -$(foreach file,$(addprefix ../../Devices/, $(VBOX_SLIRP_BSD_SOURCES)),$(eval $(call def_vbox_slirp_bsd_cflags, ../../Devices/Network))) -$(foreach file,$(addprefix ../../Devices/, $(filter-out $(VBOX_WITH_NAT_SERVICE), $(VBOX_SLIRP_SOURCES))),$(eval $(call def_vbox_slirp_service_cflags, ../../Devices/Network))) -$(foreach file,$(addprefix ../../Devices/, $(filter-out $(VBOX_WITH_NAT_SERVICE), $(VBOX_SLIRP_SOURCES))),$(eval $(call def_vbox_slirp_cflags, ../../Devices/Network))) -$(foreach file,$(addprefix ../../Devices/, $(VBOX_SLIRP_ALIAS_SOURCES)),$(eval $(call def_vbox_slirp_cflags, ../../Devices/Network))) -$(foreach file,$(addprefix ../../Devices/, $(VBOX_SLIRP_ALIAS_SOURCES)),$(eval $(call def_vbox_slirp_alias_cflags, ../../Devices/Network))) - -VBoxNetNAT_SOURCES += ../NetLib/VBoxNetBaseService.cpp - -VBoxNetNAT_LIBS = \ + +VBoxNetLwipNAT_TEMPLATE = +VBoxNetLwipNAT_TEMPLATE := VBOXMAIN$(if-expr defined(VBOX_WITH_HARDENING),DLL,CLIENTEXE) +VBoxNetLwipNAT_NAME = VBoxNetNAT +VBoxNetLwipNAT_DEFS += ${LWIP_DEFS} IPv6 +# VBoxNetLwipNAT_DEFS.linux += WITH_VALGRIND # instrument lwip memp.c +VBoxNetLwipNAT_DEFS.win += VBOX_COM_OUTOFPROC_MODULE _WIN32_WINNT=0x501 # Windows XP +VBoxNetLwipNAT_SOURCES += VBoxNetLwipNAT.cpp \ + ../NetLib/VBoxNetBaseService.cpp \ + ../NetLib/VBoxNetPortForwardString.cpp \ + ../NetLib/VBoxNetIntIf.cpp \ + ../NetLib/VBoxNetUDP.cpp \ + ../NetLib/VBoxNetARP.cpp \ + ../NetLib/ComHostUtils.cpp + +VBoxNetLwipNAT_LIBS = \ $(LIB_RUNTIME) -VBoxNetNAT_LDFLAGS.win = /SUBSYSTEM:windows +VBoxNetLwipNAT_LIBS.solaris += socket nsl +VBoxNetLwipNAT_LDFLAGS.win = /SUBSYSTEM:windows + +# Convince Solaris headers to expose socket stuff we need. 600 would +# also work, but <sys/feature_tests.h> insists on C99 for it and so +# explodes for C++. Note that for 500 it insists on NOT using C99, so +# when some day we decide to use -std=c99 we are in for some fun. +VBoxNetLwipNAT_DEFS.solaris += _XOPEN_SOURCE=500 __EXTENSIONS__=1 + +VBoxNetLwipNAT_SOURCES += \ + proxy_pollmgr.c \ + proxy_rtadvd.c \ + proxy.c \ + pxremap.c \ + pxtcp.c \ + pxudp.c \ + pxdns.c \ + fwtcp.c \ + fwudp.c \ + portfwd.c \ + proxy_dhcp6ds.c \ + proxy_tftpd.c + +ifeq ($(KBUILD_TARGET),win) + # unprivileged Icmp API + VBoxNetLwipNAT_SOURCES += pxping_win.c +else + # raw sockets + VBoxNetLwipNAT_SOURCES += pxping.c +endif + +# ifeq ($(VBOX_WITH_HARDENING),) +# ifn1of ($(KBUILD_TARGET), darwin win) +# # helper for debugging unprivileged +# VBoxNetLwipNAT_DEFS += VBOX_RAWSOCK_DEBUG_HELPER +# VBoxNetLwipNAT_SOURCES += getrawsock.c +# endif +# endif + +VBoxNetLwipNAT_SOURCES.darwin += rtmon_bsd.c +VBoxNetLwipNAT_SOURCES.freebsd += rtmon_bsd.c +VBoxNetLwipNAT_SOURCES.linux += rtmon_linux.c +VBoxNetLwipNAT_SOURCES.solaris += rtmon_bsd.c +VBoxNetLwipNAT_SOURCES.win += rtmon_win.c + +VBoxNetLwipNAT_SOURCES.win += \ + RTWinPoll.cpp \ + RTWinSocketPair.cpp + +VBoxNetLwipNAT_INCS += . # for lwipopts.h +$(eval $(call def_vbox_lwip_public, \ + VBoxNetLwipNAT, ../../Devices/Network/lwip-new)) +endif include $(FILE_KBUILD_SUB_FOOTER) diff --git a/src/VBox/NetworkServices/NAT/RTWinPoll.cpp b/src/VBox/NetworkServices/NAT/RTWinPoll.cpp new file mode 100644 index 00000000..bff30506 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/RTWinPoll.cpp @@ -0,0 +1,143 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/cdefs.h> +#include <iprt/err.h> +#include <iprt/string.h> + +#include <VBox/err.h> + +#include <Winsock2.h> +#include <Windows.h> +#include "winpoll.h" + +static HANDLE g_hNetworkEvent; + +int +RTWinPoll(struct pollfd *pFds, unsigned int nfds, int timeout, int *pNready) +{ + AssertPtrReturn(pFds, VERR_INVALID_PARAMETER); + + if (g_hNetworkEvent == WSA_INVALID_EVENT) + { + g_hNetworkEvent = WSACreateEvent(); + AssertReturn(g_hNetworkEvent != WSA_INVALID_EVENT, VERR_INTERNAL_ERROR); + } + + for (unsigned int i = 0; i < nfds; ++i) + { + long eventMask = 0; + short pollEvents = pFds[i].events; + + /* clean revents */ + pFds[i].revents = 0; + + /* ignore invalid sockets */ + if (pFds[i].fd == INVALID_SOCKET) + continue; + + /** + * POLLIN Data other than high priority data may be read without blocking. + * This is equivalent to ( POLLRDNORM | POLLRDBAND ). + * POLLRDBAND Priority data may be read without blocking. + * POLLRDNORM Normal data may be read without blocking. + */ + if (pollEvents & POLLIN) + eventMask |= FD_READ | FD_ACCEPT; + + /** + * POLLOUT Normal data may be written without blocking. This is equivalent + * to POLLWRNORM. + * POLLWRNORM Normal data may be written without blocking. + */ + if (pollEvents & POLLOUT) + eventMask |= FD_WRITE | FD_CONNECT; + + /** + * This is "moral" equivalent to POLLHUP. + */ + eventMask |= FD_CLOSE; + WSAEventSelect(pFds[i].fd, g_hNetworkEvent, eventMask); + } + + DWORD index = WSAWaitForMultipleEvents(1, + &g_hNetworkEvent, + FALSE, + timeout == RT_INDEFINITE_WAIT ? WSA_INFINITE : timeout, + FALSE); + if (index != WSA_WAIT_EVENT_0) + { + if (index == WSA_WAIT_TIMEOUT) + return VERR_TIMEOUT; + } + + int nready = 0; + for (unsigned int i = 0; i < nfds; ++i) + { + short revents = 0; + WSANETWORKEVENTS NetworkEvents; + int err; + + if (pFds[i].fd == INVALID_SOCKET) + continue; + + RT_ZERO(NetworkEvents); + + err = WSAEnumNetworkEvents(pFds[i].fd, + g_hNetworkEvent, + &NetworkEvents); + + if (err == SOCKET_ERROR) + { + if (WSAGetLastError() == WSAENOTSOCK) + { + pFds[i].revents = POLLNVAL; + ++nready; + } + continue; + } + + /* deassociate socket with event */ + WSAEventSelect(pFds[i].fd, g_hNetworkEvent, 0); + + if (NetworkEvents.lNetworkEvents & (FD_READ|FD_ACCEPT)) + { + if ( NetworkEvents.iErrorCode[FD_READ_BIT] != 0 + || NetworkEvents.iErrorCode[FD_ACCEPT_BIT] != 0) + revents |= POLLERR; + + revents |= POLLIN; + } + + if (NetworkEvents.lNetworkEvents & (FD_WRITE|FD_CONNECT)) + { + if ( NetworkEvents.iErrorCode[FD_WRITE_BIT] != 0 + || NetworkEvents.iErrorCode[FD_CONNECT_BIT] != 0) + revents |= POLLERR; + + revents |= POLLOUT; + } + + if (NetworkEvents.lNetworkEvents & FD_CLOSE) + { + if (NetworkEvents.iErrorCode[FD_CLOSE_BIT] != 0) + revents |= POLLERR; + + revents |= POLLHUP; + } + + /* paranoid */ + revents &= (pFds[i].events | POLLHUP | POLLERR); + if (revents != 0) + { + pFds[i].revents = revents; + ++nready; + } + } + WSAResetEvent(g_hNetworkEvent); + + if (pNready) + *pNready = nready; + + return VINF_SUCCESS; +} diff --git a/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp b/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp new file mode 100644 index 00000000..d5eb0ae0 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/RTWinSocketPair.cpp @@ -0,0 +1,197 @@ +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/cdefs.h> +#include <iprt/err.h> + +#include <VBox/err.h> + +#include <Winsock2.h> +#include <Windows.h> + +#include <stdio.h> +#include <iprt/log.h> + +extern "C" int RTWinSocketPair(int domain, int type, int protocol, SOCKET socket_vector[2]) +{ + LogFlowFunc(("ENTER: domain:%d, type:%d, protocol:%d, socket_vector:%p\n", + domain, type, protocol, socket_vector)); + switch (domain) + { + case AF_INET: + break; + case AF_INET6: /* I dobt we really need it. */ + default: + AssertMsgFailedReturn(("Unsuported domain:%d\n", domain), + VERR_INVALID_PARAMETER); + } + + switch(type) + { + case SOCK_STREAM: + case SOCK_DGRAM: + break; + default: + AssertMsgFailedReturn(("Unsuported type:%d\n", type), + VERR_INVALID_PARAMETER); + } + + AssertPtrReturn(socket_vector, VERR_INVALID_PARAMETER); + if (!socket_vector) + return VERR_INVALID_PARAMETER; + + socket_vector[0] = socket_vector[1] = INVALID_SOCKET; + + SOCKET listener = INVALID_SOCKET; + + union { + struct sockaddr_in in_addr; + struct sockaddr addr; + } sa[2]; + + int cb = sizeof(sa); + memset(&sa, 0, cb); + + sa[0].in_addr.sin_family = domain; + sa[0].in_addr.sin_addr.s_addr = RT_H2N_U32(INADDR_LOOPBACK); + sa[0].in_addr.sin_port = 0; + cb = sizeof(sa[0]); + + if (type == SOCK_STREAM) + { + listener = WSASocket(domain, type, protocol, 0, NULL, 0); + + if (listener == INVALID_SOCKET) + { + return VERR_INTERNAL_ERROR; + } + + int reuse = 1; + cb = sizeof(int); + int rc = setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, (char *)&reuse, cb); + + if (rc) + { + goto close_socket; + } + + cb = sizeof(sa[0]); + rc = bind(listener, &sa[0].addr, cb); + if(rc) + { + goto close_socket; + } + + memset(&sa[0], 0, cb); + rc = getsockname(listener, &sa[0].addr, &cb); + if (rc) + { + goto close_socket; + } + + rc = listen(listener, 1); + if (rc) + { + goto close_socket; + } + + socket_vector[0] = WSASocket(domain, type, protocol, 0, NULL, 0); + if (socket_vector[0] == INVALID_SOCKET) + { + goto close_socket; + } + + rc = connect(socket_vector[0], &sa[0].addr, cb); + if (rc) + goto close_socket; + + + socket_vector[1] = accept(listener, NULL, NULL); + if (socket_vector[1] == INVALID_SOCKET) + { + goto close_socket; + } + + closesocket(listener); + } + else + { + socket_vector[0] = WSASocket(domain, type, protocol, 0, NULL, 0); + + cb = sizeof(sa[0]); + int rc = bind(socket_vector[0], &sa[0].addr, cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + sa[1].in_addr.sin_family = domain; + sa[1].in_addr.sin_addr.s_addr = RT_H2N_U32(INADDR_LOOPBACK); + sa[1].in_addr.sin_port = 0; + + socket_vector[1] = WSASocket(domain, type, protocol, 0, NULL, 0); + rc = bind(socket_vector[1], &sa[1].addr, cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + { + u_long mode = 0; + rc = ioctlsocket(socket_vector[0], FIONBIO, &mode); + AssertMsgReturn(rc != SOCKET_ERROR, + ("ioctl error: %d\n", WSAGetLastError()), + VERR_INTERNAL_ERROR); + + rc = ioctlsocket(socket_vector[1], FIONBIO, &mode); + AssertMsgReturn(rc != SOCKET_ERROR, + ("ioctl error: %d\n", WSAGetLastError()), + VERR_INTERNAL_ERROR); + } + + memset(&sa, 0, 2 * cb); + rc = getsockname(socket_vector[0], &sa[0].addr, &cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + rc = getsockname(socket_vector[1], &sa[1].addr, &cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + rc = connect(socket_vector[0], &sa[1].addr, cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + + rc = connect(socket_vector[1], &sa[0].addr, cb); + Assert(rc != SOCKET_ERROR); + if (rc == SOCKET_ERROR) + { + goto close_socket; + } + } + LogFlowFuncLeaveRC(VINF_SUCCESS); + return VINF_SUCCESS; + +close_socket: + if (listener != INVALID_SOCKET) + closesocket(listener); + + if (socket_vector[0] != INVALID_SOCKET) + closesocket(socket_vector[0]); + + if (socket_vector[1] != INVALID_SOCKET) + closesocket(socket_vector[1]); + + LogFlowFuncLeaveRC(VERR_INTERNAL_ERROR); + return VERR_INTERNAL_ERROR; +} diff --git a/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp b/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp new file mode 100644 index 00000000..01ee26e6 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/VBoxNetLwipNAT.cpp @@ -0,0 +1,1420 @@ +/* $Id: VBoxNetLwipNAT.cpp $ */ +/** @file + * VBoxNetNAT - NAT Service for connecting to IntNet. + */ + +/* + * Copyright (C) 2009 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/* Must be included before winutils.h (lwip/def.h), otherwise Windows build breaks. */ +#include <iprt/cpp/mem.h> + +#include "winutils.h" + +#include <VBox/com/assert.h> +#include <VBox/com/com.h> +#include <VBox/com/listeners.h> +#include <VBox/com/string.h> +#include <VBox/com/Guid.h> +#include <VBox/com/array.h> +#include <VBox/com/ErrorInfo.h> +#include <VBox/com/errorprint.h> +#include <VBox/com/VirtualBox.h> + +#include <iprt/net.h> +#include <iprt/initterm.h> +#include <iprt/alloca.h> +#ifndef RT_OS_WINDOWS +# include <arpa/inet.h> +#endif +#include <iprt/err.h> +#include <iprt/time.h> +#include <iprt/timer.h> +#include <iprt/thread.h> +#include <iprt/stream.h> +#include <iprt/path.h> +#include <iprt/param.h> +#include <iprt/pipe.h> +#include <iprt/getopt.h> +#include <iprt/string.h> +#include <iprt/mem.h> +#include <iprt/message.h> +#include <iprt/req.h> +#include <iprt/file.h> +#include <iprt/semaphore.h> +#include <iprt/cpp/utils.h> +#define LOG_GROUP LOG_GROUP_NAT_SERVICE +#include <VBox/log.h> + +#include <VBox/sup.h> +#include <VBox/intnet.h> +#include <VBox/intnetinline.h> +#include <VBox/vmm/pdmnetinline.h> +#include <VBox/vmm/vmm.h> +#include <VBox/version.h> + +#ifndef RT_OS_WINDOWS +# include <sys/poll.h> +# include <sys/socket.h> +# include <netinet/in.h> +# ifdef RT_OS_LINUX +# include <linux/icmp.h> /* ICMP_FILTER */ +# endif +# include <netinet/icmp6.h> +#endif + +#include <map> +#include <vector> +#include <string> + +#include <stdio.h> + +#include "../NetLib/VBoxNetLib.h" +#include "../NetLib/VBoxNetBaseService.h" +#include "../NetLib/utils.h" +#include "VBoxLwipCore.h" + +extern "C" +{ +/* bunch of LWIP headers */ +#include "lwip/sys.h" +#include "lwip/pbuf.h" +#include "lwip/netif.h" +#include "lwip/ethip6.h" +#include "lwip/nd6.h" // for proxy_na_hook +#include "lwip/mld6.h" +#include "lwip/tcpip.h" +#include "netif/etharp.h" + +#include "proxy.h" +#include "pxremap.h" +#include "portfwd.h" +} + + +#if defined(VBOX_RAWSOCK_DEBUG_HELPER) \ + && (defined(VBOX_WITH_HARDENING) \ + || defined(RT_OS_WINDOWS) \ + || defined(RT_OS_DARWIN)) +# error Have you forgotten to turn off VBOX_RAWSOCK_DEBUG_HELPER? +#endif + +#ifdef VBOX_RAWSOCK_DEBUG_HELPER +extern "C" int getrawsock(int type); +#endif + +#include "../NetLib/VBoxPortForwardString.h" + +static RTGETOPTDEF g_aGetOptDef[] = +{ + { "--port-forward4", 'p', RTGETOPT_REQ_STRING }, + { "--port-forward6", 'P', RTGETOPT_REQ_STRING } +}; + +typedef struct NATSEVICEPORTFORWARDRULE +{ + PORTFORWARDRULE Pfr; + fwspec FWSpec; +} NATSEVICEPORTFORWARDRULE, *PNATSEVICEPORTFORWARDRULE; + +typedef std::vector<NATSEVICEPORTFORWARDRULE> VECNATSERVICEPF; +typedef VECNATSERVICEPF::iterator ITERATORNATSERVICEPF; +typedef VECNATSERVICEPF::const_iterator CITERATORNATSERVICEPF; + +static int fetchNatPortForwardRules(const ComNatPtr&, bool, VECNATSERVICEPF&); + +static int vboxNetNATLogInit(int argc, char **argv); + + +class VBoxNetLwipNAT: public VBoxNetBaseService, public NATNetworkEventAdapter +{ + friend class NATNetworkListener; + public: + VBoxNetLwipNAT(SOCKET icmpsock4, SOCKET icmpsock6); + virtual ~VBoxNetLwipNAT(); + void usage(){ /* @todo: should be implemented */ }; + int run(); + virtual int init(void); + virtual int parseOpt(int rc, const RTGETOPTUNION& getOptVal); + /* VBoxNetNAT always needs Main */ + virtual bool isMainNeeded() const { return true; } + virtual int processFrame(void *, size_t); + virtual int processGSO(PCPDMNETWORKGSO, size_t); + virtual int processUDP(void *, size_t) { return VERR_IGNORED; } + + private: + struct proxy_options m_ProxyOptions; + struct sockaddr_in m_src4; + struct sockaddr_in6 m_src6; + /** + * place for registered local interfaces. + */ + ip4_lomap m_lo2off[10]; + ip4_lomap_desc m_loOptDescriptor; + + uint16_t m_u16Mtu; + netif m_LwipNetIf; + + /* Our NAT network descriptor in Main */ + ComPtr<INATNetwork> m_net; + ComNatListenerPtr m_listener; + + ComPtr<IHost> m_host; + ComNatListenerPtr m_vboxListener; + static INTNETSEG aXmitSeg[64]; + + HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent); + + const char **getHostNameservers(); + + /* Only for debug needs, by default NAT service should load rules from SVC + * on startup, and then on sync them on events. + */ + bool fDontLoadRulesOnStartup; + static void onLwipTcpIpInit(void *arg); + static void onLwipTcpIpFini(void *arg); + static err_t netifInit(netif *pNetif); + static err_t netifLinkoutput(netif *pNetif, pbuf *pBuf); + static int intNetThreadRecv(RTTHREAD, void *); + + VECNATSERVICEPF m_vecPortForwardRule4; + VECNATSERVICEPF m_vecPortForwardRule6; + + static int natServicePfRegister(NATSEVICEPORTFORWARDRULE& natServicePf); + static int natServiceProcessRegisteredPf(VECNATSERVICEPF& vecPf); +}; + + +static VBoxNetLwipNAT *g_pLwipNat; +INTNETSEG VBoxNetLwipNAT::aXmitSeg[64]; + +/** + * @note: this work on Event thread. + */ +HRESULT VBoxNetLwipNAT::HandleEvent(VBoxEventType_T aEventType, + IEvent *pEvent) +{ + HRESULT hrc = S_OK; + switch (aEventType) + { + case VBoxEventType_OnNATNetworkSetting: + { + ComPtr<INATNetworkSettingEvent> evSettings(pEvent); + // XXX: only handle IPv6 default route for now + + if (!m_ProxyOptions.ipv6_enabled) + { + break; + } + + BOOL fIPv6DefaultRoute = FALSE; + hrc = evSettings->COMGETTER(AdvertiseDefaultIPv6RouteEnabled)(&fIPv6DefaultRoute); + AssertReturn(SUCCEEDED(hrc), hrc); + + if (m_ProxyOptions.ipv6_defroute == fIPv6DefaultRoute) + { + break; + } + + m_ProxyOptions.ipv6_defroute = fIPv6DefaultRoute; + tcpip_callback_with_block(proxy_rtadvd_do_quick, &m_LwipNetIf, 0); + + break; + } + + case VBoxEventType_OnNATNetworkPortForward: + { + com::Bstr name, strHostAddr, strGuestAddr; + LONG lHostPort, lGuestPort; + BOOL fCreateFW, fIPv6FW; + NATProtocol_T proto = NATProtocol_TCP; + + + ComPtr<INATNetworkPortForwardEvent> pfEvt = pEvent; + + hrc = pfEvt->COMGETTER(Create)(&fCreateFW); + AssertReturn(SUCCEEDED(hrc), hrc); + + hrc = pfEvt->COMGETTER(Ipv6)(&fIPv6FW); + AssertReturn(SUCCEEDED(hrc), hrc); + + hrc = pfEvt->COMGETTER(Name)(name.asOutParam()); + AssertReturn(SUCCEEDED(hrc), hrc); + + hrc = pfEvt->COMGETTER(Proto)(&proto); + AssertReturn(SUCCEEDED(hrc), hrc); + + hrc = pfEvt->COMGETTER(HostIp)(strHostAddr.asOutParam()); + AssertReturn(SUCCEEDED(hrc), hrc); + + hrc = pfEvt->COMGETTER(HostPort)(&lHostPort); + AssertReturn(SUCCEEDED(hrc), hrc); + + hrc = pfEvt->COMGETTER(GuestIp)(strGuestAddr.asOutParam()); + AssertReturn(SUCCEEDED(hrc), hrc); + + hrc = pfEvt->COMGETTER(GuestPort)(&lGuestPort); + AssertReturn(SUCCEEDED(hrc), hrc); + + VECNATSERVICEPF& rules = (fIPv6FW ? + m_vecPortForwardRule6 : + m_vecPortForwardRule4); + + NATSEVICEPORTFORWARDRULE r; + RT_ZERO(r); + + r.Pfr.fPfrIPv6 = fIPv6FW; + + switch (proto) + { + case NATProtocol_TCP: + r.Pfr.iPfrProto = IPPROTO_TCP; + break; + case NATProtocol_UDP: + r.Pfr.iPfrProto = IPPROTO_UDP; + break; + + default: + LogRel(("Event: %s %s port-forwarding rule \"%s\":" + " invalid protocol %d\n", + fCreateFW ? "Add" : "Remove", + fIPv6FW ? "IPv6" : "IPv4", + com::Utf8Str(name).c_str(), + (int)proto)); + goto port_forward_done; + } + + LogRel(("Event: %s %s port-forwarding rule \"%s\":" + " %s %s%s%s:%d -> %s%s%s:%d\n", + fCreateFW ? "Add" : "Remove", + fIPv6FW ? "IPv6" : "IPv4", + com::Utf8Str(name).c_str(), + proto == NATProtocol_TCP ? "TCP" : "UDP", + /* from */ + fIPv6FW ? "[" : "", + com::Utf8Str(strHostAddr).c_str(), + fIPv6FW ? "]" : "", + lHostPort, + /* to */ + fIPv6FW ? "[" : "", + com::Utf8Str(strGuestAddr).c_str(), + fIPv6FW ? "]" : "", + lGuestPort)); + + if (name.length() > sizeof(r.Pfr.szPfrName)) + { + hrc = E_INVALIDARG; + goto port_forward_done; + } + + RTStrPrintf(r.Pfr.szPfrName, sizeof(r.Pfr.szPfrName), + "%s", com::Utf8Str(name).c_str()); + + RTStrPrintf(r.Pfr.szPfrHostAddr, sizeof(r.Pfr.szPfrHostAddr), + "%s", com::Utf8Str(strHostAddr).c_str()); + + /* XXX: limits should be checked */ + r.Pfr.u16PfrHostPort = (uint16_t)lHostPort; + + RTStrPrintf(r.Pfr.szPfrGuestAddr, sizeof(r.Pfr.szPfrGuestAddr), + "%s", com::Utf8Str(strGuestAddr).c_str()); + + /* XXX: limits should be checked */ + r.Pfr.u16PfrGuestPort = (uint16_t)lGuestPort; + + if (fCreateFW) /* Addition */ + { + int rc = natServicePfRegister(r); + if (RT_SUCCESS(rc)) + rules.push_back(r); + } + else /* Deletion */ + { + ITERATORNATSERVICEPF it; + for (it = rules.begin(); it != rules.end(); ++it) + { + /* compare */ + NATSEVICEPORTFORWARDRULE& natFw = *it; + if ( natFw.Pfr.iPfrProto == r.Pfr.iPfrProto + && natFw.Pfr.u16PfrHostPort == r.Pfr.u16PfrHostPort + && (strncmp(natFw.Pfr.szPfrHostAddr, r.Pfr.szPfrHostAddr, INET6_ADDRSTRLEN) == 0) + && natFw.Pfr.u16PfrGuestPort == r.Pfr.u16PfrGuestPort + && (strncmp(natFw.Pfr.szPfrGuestAddr, r.Pfr.szPfrGuestAddr, INET6_ADDRSTRLEN) == 0)) + { + RTCMemAutoPtr<fwspec> pFwCopy; + if (RT_UNLIKELY(!pFwCopy.alloc())) + break; + + memcpy(pFwCopy.get(), &natFw.FWSpec, sizeof(natFw.FWSpec)); + + int status = portfwd_rule_del(pFwCopy.get()); + if (status != 0) + break; + + pFwCopy.release(); /* owned by lwip thread now */ + rules.erase(it); + break; + } + } /* loop over vector elements */ + } /* condition add or delete */ + port_forward_done: + /* clean up strings */ + name.setNull(); + strHostAddr.setNull(); + strGuestAddr.setNull(); + break; + } + + case VBoxEventType_OnHostNameResolutionConfigurationChange: + { + const char **ppcszNameServers = getHostNameservers(); + err_t error; + + error = tcpip_callback_with_block(pxdns_set_nameservers, + ppcszNameServers, + /* :block */ 0); + if (error != ERR_OK && ppcszNameServers != NULL) + { + RTMemFree(ppcszNameServers); + } + break; + } + } + return hrc; +} + + +void VBoxNetLwipNAT::onLwipTcpIpInit(void* arg) +{ + AssertPtrReturnVoid(arg); + VBoxNetLwipNAT *pNat = static_cast<VBoxNetLwipNAT *>(arg); + + HRESULT hrc = com::Initialize(); + Assert(!FAILED(hrc)); + + proxy_arp_hook = pxremap_proxy_arp; + proxy_ip4_divert_hook = pxremap_ip4_divert; + + proxy_na_hook = pxremap_proxy_na; + proxy_ip6_divert_hook = pxremap_ip6_divert; + + /* lwip thread */ + RTNETADDRIPV4 network; + RTNETADDRIPV4 address = g_pLwipNat->getIpv4Address(); + RTNETADDRIPV4 netmask = g_pLwipNat->getIpv4Netmask(); + network.u = address.u & netmask.u; + + ip_addr LwipIpAddr, LwipIpNetMask, LwipIpNetwork; + + memcpy(&LwipIpAddr, &address, sizeof(ip_addr)); + memcpy(&LwipIpNetMask, &netmask, sizeof(ip_addr)); + memcpy(&LwipIpNetwork, &network, sizeof(ip_addr)); + + netif *pNetif = netif_add(&g_pLwipNat->m_LwipNetIf /* Lwip Interface */, + &LwipIpAddr /* IP address*/, + &LwipIpNetMask /* Network mask */, + &LwipIpAddr /* gateway address, @todo: is self IP acceptable? */, + g_pLwipNat /* state */, + VBoxNetLwipNAT::netifInit /* netif_init_fn */, + tcpip_input /* netif_input_fn */); + + AssertPtrReturnVoid(pNetif); + + LogRel(("netif %c%c%d: mac %RTmac\n", + pNetif->name[0], pNetif->name[1], pNetif->num, + pNetif->hwaddr)); + LogRel(("netif %c%c%d: inet %RTnaipv4 netmask %RTnaipv4\n", + pNetif->name[0], pNetif->name[1], pNetif->num, + pNetif->ip_addr, pNetif->netmask)); + for (int i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + if (!ip6_addr_isinvalid(netif_ip6_addr_state(pNetif, i))) { + LogRel(("netif %c%c%d: inet6 %RTnaipv6\n", + pNetif->name[0], pNetif->name[1], pNetif->num, + netif_ip6_addr(pNetif, i))); + } + } + + netif_set_up(pNetif); + netif_set_link_up(pNetif); + + if (pNat->m_ProxyOptions.ipv6_enabled) { + /* + * XXX: lwIP currently only ever calls mld6_joingroup() in + * nd6_tmr() for fresh tentative addresses, which is a wrong place + * to do it - but I'm not keen on fixing this properly for now + * (with correct handling of interface up and down transitions, + * etc). So stick it here as a kludge. + */ + for (int i = 0; i <= 1; ++i) { + ip6_addr_t *paddr = netif_ip6_addr(pNetif, i); + + ip6_addr_t solicited_node_multicast_address; + ip6_addr_set_solicitednode(&solicited_node_multicast_address, + paddr->addr[3]); + mld6_joingroup(paddr, &solicited_node_multicast_address); + } + + /* + * XXX: We must join the solicited-node multicast for the + * addresses we do IPv6 NA-proxy for. We map IPv6 loopback to + * proxy address + 1. We only need the low 24 bits, and those are + * fixed. + */ + { + ip6_addr_t solicited_node_multicast_address; + + ip6_addr_set_solicitednode(&solicited_node_multicast_address, + /* last 24 bits of the address */ + PP_HTONL(0x00000002)); + mld6_netif_joingroup(pNetif, &solicited_node_multicast_address); + } + } + + proxy_init(&g_pLwipNat->m_LwipNetIf, &g_pLwipNat->m_ProxyOptions); + + natServiceProcessRegisteredPf(g_pLwipNat->m_vecPortForwardRule4); + natServiceProcessRegisteredPf(g_pLwipNat->m_vecPortForwardRule6); +} + + +void VBoxNetLwipNAT::onLwipTcpIpFini(void* arg) +{ + AssertPtrReturnVoid(arg); + VBoxNetLwipNAT *pThis = (VBoxNetLwipNAT *)arg; + + /* XXX: proxy finalization */ + netif_set_link_down(&g_pLwipNat->m_LwipNetIf); + netif_set_down(&g_pLwipNat->m_LwipNetIf); + netif_remove(&g_pLwipNat->m_LwipNetIf); + +} + +/* + * Callback for netif_add() to initialize the interface. + */ +err_t VBoxNetLwipNAT::netifInit(netif *pNetif) +{ + err_t rcLwip = ERR_OK; + + AssertPtrReturn(pNetif, ERR_ARG); + + VBoxNetLwipNAT *pNat = static_cast<VBoxNetLwipNAT *>(pNetif->state); + AssertPtrReturn(pNat, ERR_ARG); + + LogFlowFunc(("ENTER: pNetif[%c%c%d]\n", pNetif->name[0], pNetif->name[1], pNetif->num)); + /* validity */ + AssertReturn( pNetif->name[0] == 'N' + && pNetif->name[1] == 'T', ERR_ARG); + + + pNetif->hwaddr_len = sizeof(RTMAC); + RTMAC mac = g_pLwipNat->getMacAddress(); + memcpy(pNetif->hwaddr, &mac, sizeof(RTMAC)); + + pNat->m_u16Mtu = 1500; // XXX: FIXME + pNetif->mtu = pNat->m_u16Mtu; + + pNetif->flags = NETIF_FLAG_BROADCAST + | NETIF_FLAG_ETHARP /* Don't bother driver with ARP and let Lwip resolve ARP handling */ + | NETIF_FLAG_ETHERNET; /* Lwip works with ethernet too */ + + pNetif->linkoutput = netifLinkoutput; /* ether-level-pipe */ + pNetif->output = etharp_output; /* ip-pipe */ + + if (pNat->m_ProxyOptions.ipv6_enabled) { + pNetif->output_ip6 = ethip6_output; + + /* IPv6 link-local address in slot 0 */ + netif_create_ip6_linklocal_address(pNetif, /* :from_mac_48bit */ 1); + netif_ip6_addr_set_state(pNetif, 0, IP6_ADDR_PREFERRED); // skip DAD + + /* + * RFC 4193 Locally Assigned Global ID (ULA) in slot 1 + * [fd17:625c:f037:XXXX::1] where XXXX, 16 bit Subnet ID, are two + * bytes from the middle of the IPv4 address, e.g. :dead: for + * 10.222.173.1 + */ + u8_t nethi = ip4_addr2(&pNetif->ip_addr); + u8_t netlo = ip4_addr3(&pNetif->ip_addr); + + ip6_addr_t *paddr = netif_ip6_addr(pNetif, 1); + IP6_ADDR(paddr, 0, 0xFD, 0x17, 0x62, 0x5C); + IP6_ADDR(paddr, 1, 0xF0, 0x37, nethi, netlo); + IP6_ADDR(paddr, 2, 0x00, 0x00, 0x00, 0x00); + IP6_ADDR(paddr, 3, 0x00, 0x00, 0x00, 0x01); + netif_ip6_addr_set_state(pNetif, 1, IP6_ADDR_PREFERRED); + +#if LWIP_IPV6_SEND_ROUTER_SOLICIT + pNetif->rs_count = 0; +#endif + } + + LogFlowFunc(("LEAVE: %d\n", rcLwip)); + return rcLwip; +} + + +err_t VBoxNetLwipNAT::netifLinkoutput(netif *pNetif, pbuf *pPBuf) +{ + AssertPtrReturn(pNetif, ERR_ARG); + AssertPtrReturn(pPBuf, ERR_ARG); + + VBoxNetLwipNAT *self = static_cast<VBoxNetLwipNAT *>(pNetif->state); + AssertPtrReturn(self, ERR_IF); + AssertReturn(self == g_pLwipNat, ERR_ARG); + + LogFlowFunc(("ENTER: pNetif[%c%c%d], pPbuf:%p\n", + pNetif->name[0], + pNetif->name[1], + pNetif->num, + pPBuf)); + + RT_ZERO(VBoxNetLwipNAT::aXmitSeg); + + size_t idx = 0; + for (struct pbuf *q = pPBuf; q != NULL; q = q->next, ++idx) + { + AssertReturn(idx < RT_ELEMENTS(VBoxNetLwipNAT::aXmitSeg), ERR_MEM); + +#if ETH_PAD_SIZE + if (q == pPBuf) + { + VBoxNetLwipNAT::aXmitSeg[idx].pv = (uint8_t *)q->payload + ETH_PAD_SIZE; + VBoxNetLwipNAT::aXmitSeg[idx].cb = q->len - ETH_PAD_SIZE; + } + else +#endif + { + VBoxNetLwipNAT::aXmitSeg[idx].pv = q->payload; + VBoxNetLwipNAT::aXmitSeg[idx].cb = q->len; + } + } + + int rc = self->sendBufferOnWire(VBoxNetLwipNAT::aXmitSeg, idx, + pPBuf->tot_len - ETH_PAD_SIZE); + AssertRCReturn(rc, ERR_IF); + + self->flushWire(); + + LogFlowFunc(("LEAVE: %d\n", ERR_OK)); + return ERR_OK; +} + + +VBoxNetLwipNAT::VBoxNetLwipNAT(SOCKET icmpsock4, SOCKET icmpsock6) : VBoxNetBaseService("VBoxNetNAT", "nat-network") +{ + LogFlowFuncEnter(); + + m_ProxyOptions.ipv6_enabled = 0; + m_ProxyOptions.ipv6_defroute = 0; + m_ProxyOptions.icmpsock4 = icmpsock4; + m_ProxyOptions.icmpsock6 = icmpsock6; + m_ProxyOptions.tftp_root = NULL; + m_ProxyOptions.src4 = NULL; + m_ProxyOptions.src6 = NULL; + memset(&m_src4, 0, sizeof(m_src4)); + memset(&m_src6, 0, sizeof(m_src6)); + m_src4.sin_family = AF_INET; + m_src6.sin6_family = AF_INET6; +#if HAVE_SA_LEN + m_src4.sin_len = sizeof(m_src4); + m_src6.sin6_len = sizeof(m_src6); +#endif + m_ProxyOptions.nameservers = NULL; + + m_LwipNetIf.name[0] = 'N'; + m_LwipNetIf.name[1] = 'T'; + + RTMAC mac; + mac.au8[0] = 0x52; + mac.au8[1] = 0x54; + mac.au8[2] = 0; + mac.au8[3] = 0x12; + mac.au8[4] = 0x35; + mac.au8[5] = 0; + setMacAddress(mac); + + RTNETADDRIPV4 address; + address.u = RT_MAKE_U32_FROM_U8( 10, 0, 2, 2); // NB: big-endian + setIpv4Address(address); + + address.u = RT_H2N_U32_C(0xffffff00); + setIpv4Netmask(address); + + fDontLoadRulesOnStartup = false; + + for(unsigned int i = 0; i < RT_ELEMENTS(g_aGetOptDef); ++i) + addCommandLineOption(&g_aGetOptDef[i]); + + LogFlowFuncLeave(); +} + + +VBoxNetLwipNAT::~VBoxNetLwipNAT() +{ + if (m_ProxyOptions.tftp_root != NULL) + { + RTStrFree((char *)m_ProxyOptions.tftp_root); + } +} + + +int VBoxNetLwipNAT::natServicePfRegister(NATSEVICEPORTFORWARDRULE& natPf) +{ + int lrc; + + int sockFamily = (natPf.Pfr.fPfrIPv6 ? PF_INET6 : PF_INET); + int socketSpec; + switch(natPf.Pfr.iPfrProto) + { + case IPPROTO_TCP: + socketSpec = SOCK_STREAM; + break; + case IPPROTO_UDP: + socketSpec = SOCK_DGRAM; + break; + default: + return VERR_IGNORED; + } + + const char *pszHostAddr = natPf.Pfr.szPfrHostAddr; + if (pszHostAddr[0] == '\0') + { + if (sockFamily == PF_INET) + pszHostAddr = "0.0.0.0"; + else + pszHostAddr = "::"; + } + + lrc = fwspec_set(&natPf.FWSpec, + sockFamily, + socketSpec, + pszHostAddr, + natPf.Pfr.u16PfrHostPort, + natPf.Pfr.szPfrGuestAddr, + natPf.Pfr.u16PfrGuestPort); + if (lrc != 0) + return VERR_IGNORED; + + RTCMemAutoPtr<fwspec> pFwCopy; + if (RT_UNLIKELY(!pFwCopy.alloc())) + { + LogRel(("Unable to allocate memory for %s rule \"%s\"\n", + natPf.Pfr.fPfrIPv6 ? "IPv6" : "IPv4", + natPf.Pfr.szPfrName)); + return VERR_IGNORED; + } + + memcpy(pFwCopy.get(), &natPf.FWSpec, sizeof(natPf.FWSpec)); + + lrc = portfwd_rule_add(pFwCopy.get()); + if (lrc != 0) + return VERR_IGNORED; + + pFwCopy.release(); /* owned by lwip thread now */ + return VINF_SUCCESS; +} + + +int VBoxNetLwipNAT::natServiceProcessRegisteredPf(VECNATSERVICEPF& vecRules) +{ + ITERATORNATSERVICEPF it; + for (it = vecRules.begin(); it != vecRules.end(); ++it) + { + NATSEVICEPORTFORWARDRULE &natPf = *it; + + LogRel(("Loading %s port-forwarding rule \"%s\": %s %s%s%s:%d -> %s%s%s:%d\n", + natPf.Pfr.fPfrIPv6 ? "IPv6" : "IPv4", + natPf.Pfr.szPfrName, + natPf.Pfr.iPfrProto == IPPROTO_TCP ? "TCP" : "UDP", + /* from */ + natPf.Pfr.fPfrIPv6 ? "[" : "", + natPf.Pfr.szPfrHostAddr, + natPf.Pfr.fPfrIPv6 ? "]" : "", + natPf.Pfr.u16PfrHostPort, + /* to */ + natPf.Pfr.fPfrIPv6 ? "[" : "", + natPf.Pfr.szPfrGuestAddr, + natPf.Pfr.fPfrIPv6 ? "]" : "", + natPf.Pfr.u16PfrGuestPort)); + + natServicePfRegister(natPf); + } + + return VINF_SUCCESS; +} + + +/** This method executed on main thread, only at the end threr're one threads started explcitly (LWIP and later in ::run() + * RECV) + */ +int VBoxNetLwipNAT::init() +{ + LogFlowFuncEnter(); + + /* virtualbox initialized in super class */ + int rc = ::VBoxNetBaseService::init(); + AssertRCReturn(rc, rc); + + std::string networkName = getNetwork(); + rc = findNatNetwork(virtualbox, networkName, m_net); + AssertRCReturn(rc, rc); + + ComEventTypeArray aNetEvents; + aNetEvents.push_back(VBoxEventType_OnNATNetworkPortForward); + aNetEvents.push_back(VBoxEventType_OnNATNetworkSetting); + rc = createNatListener(m_listener, virtualbox, this, aNetEvents); + AssertRCReturn(rc, rc); + + + // resolver changes are reported on vbox but are retrieved from + // host so stash a pointer for future lookups + HRESULT hrc = virtualbox->COMGETTER(Host)(m_host.asOutParam()); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + ComEventTypeArray aVBoxEvents; + aVBoxEvents.push_back(VBoxEventType_OnHostNameResolutionConfigurationChange); + rc = createNatListener(m_vboxListener, virtualbox, this, aVBoxEvents); + AssertRCReturn(rc, rc); + + BOOL fIPv6Enabled = FALSE; + hrc = m_net->COMGETTER(IPv6Enabled)(&fIPv6Enabled); + AssertComRCReturn(hrc, VERR_NOT_FOUND); + + BOOL fIPv6DefaultRoute = FALSE; + if (fIPv6Enabled) + { + hrc = m_net->COMGETTER(AdvertiseDefaultIPv6RouteEnabled)(&fIPv6DefaultRoute); + AssertComRCReturn(hrc, VERR_NOT_FOUND); + } + + m_ProxyOptions.ipv6_enabled = fIPv6Enabled; + m_ProxyOptions.ipv6_defroute = fIPv6DefaultRoute; + + + com::Bstr bstrSourceIp4Key = com::BstrFmt("NAT/%s/SourceIp4", networkName.c_str()); + com::Bstr bstrSourceIpX; + hrc = virtualbox->GetExtraData(bstrSourceIp4Key.raw(), bstrSourceIpX.asOutParam()); + if (SUCCEEDED(hrc)) + { + RTNETADDRIPV4 addr; + rc = RTNetStrToIPv4Addr(com::Utf8Str(bstrSourceIpX).c_str(), &addr); + if (RT_SUCCESS(rc)) + { + RT_ZERO(m_src4); + + m_src4.sin_addr.s_addr = addr.u; + m_ProxyOptions.src4 = &m_src4; + + bstrSourceIpX.setNull(); + } + } + + if (!fDontLoadRulesOnStartup) + { + fetchNatPortForwardRules(m_net, false, m_vecPortForwardRule4); + fetchNatPortForwardRules(m_net, true, m_vecPortForwardRule6); + } /* if (!fDontLoadRulesOnStartup) */ + + AddressToOffsetMapping tmp; + rc = localMappings(m_net, tmp); + if (RT_SUCCESS(rc) && tmp.size() != 0) + { + unsigned long i = 0; + for (AddressToOffsetMapping::iterator it = tmp.begin(); + it != tmp.end() && i < RT_ELEMENTS(m_lo2off); + ++it, ++i) + { + ip4_addr_set_u32(&m_lo2off[i].loaddr, it->first.u); + m_lo2off[i].off = it->second; + } + + m_loOptDescriptor.lomap = m_lo2off; + m_loOptDescriptor.num_lomap = i; + m_ProxyOptions.lomap_desc = &m_loOptDescriptor; + } + + com::Bstr bstr; + hrc = virtualbox->COMGETTER(HomeFolder)(bstr.asOutParam()); + AssertComRCReturn(hrc, VERR_NOT_FOUND); + if (!bstr.isEmpty()) + { + com::Utf8Str strTftpRoot(com::Utf8StrFmt("%ls%c%s", + bstr.raw(), RTPATH_DELIMITER, "TFTP")); + char *pszStrTemp; // avoid const char ** vs char ** + rc = RTStrUtf8ToCurrentCP(&pszStrTemp, strTftpRoot.c_str()); + AssertRC(rc); + m_ProxyOptions.tftp_root = pszStrTemp; + } + + m_ProxyOptions.nameservers = getHostNameservers(); + + /* end of COM initialization */ + + rc = g_pLwipNat->tryGoOnline(); + if (RT_FAILURE(rc)) + { + return rc; + } + + /* this starts LWIP thread */ + vboxLwipCoreInitialize(VBoxNetLwipNAT::onLwipTcpIpInit, this); + + LogFlowFuncLeaveRC(rc); + return rc; +} + + +const char **VBoxNetLwipNAT::getHostNameservers() +{ + HRESULT hrc; + + if (m_host.isNull()) + { + return NULL; + } + + com::SafeArray<BSTR> aNameServers; + hrc = m_host->COMGETTER(NameServers)(ComSafeArrayAsOutParam(aNameServers)); + if (FAILED(hrc)) + { + return NULL; + } + + const size_t cNameServers = aNameServers.size(); + if (cNameServers == 0) + { + return NULL; + } + + const char **ppcszNameServers = + (const char **)RTMemAllocZ(sizeof(char *) * (cNameServers + 1)); + if (ppcszNameServers == NULL) + { + return NULL; + } + + size_t idxLast = 0; + for (size_t i = 0; i < cNameServers; ++i) + { + com::Utf8Str strNameServer(aNameServers[i]); + ppcszNameServers[idxLast] = RTStrDup(strNameServer.c_str()); + if (ppcszNameServers[idxLast] != NULL) + { + ++idxLast; + } + } + + if (idxLast == 0) + { + RTMemFree(ppcszNameServers); + return NULL; + } + + return ppcszNameServers; +} + + +int VBoxNetLwipNAT::parseOpt(int rc, const RTGETOPTUNION& Val) +{ + switch (rc) + { + case 'p': + case 'P': + { + NATSEVICEPORTFORWARDRULE Rule; + VECNATSERVICEPF& rules = (rc == 'P'? + m_vecPortForwardRule6 + : m_vecPortForwardRule4); + + fDontLoadRulesOnStartup = true; + + RT_ZERO(Rule); + + int irc = netPfStrToPf(Val.psz, (rc == 'P'), &Rule.Pfr); + rules.push_back(Rule); + return VINF_SUCCESS; + } + default:; + } + return VERR_NOT_FOUND; +} + + +int VBoxNetLwipNAT::processFrame(void *pvFrame, size_t cbFrame) +{ + AssertPtrReturn(pvFrame, VERR_INVALID_PARAMETER); + AssertReturn(cbFrame != 0, VERR_INVALID_PARAMETER); + + struct pbuf *p = pbuf_alloc(PBUF_RAW, cbFrame + ETH_PAD_SIZE, PBUF_POOL); + if (RT_UNLIKELY(p == NULL)) + { + return VERR_NO_MEMORY; + } + + /* + * The code below is inlined version of: + * + * pbuf_header(p, -ETH_PAD_SIZE); // hide padding + * pbuf_take(p, pvFrame, cbFrame); + * pbuf_header(p, ETH_PAD_SIZE); // reveal padding + */ + struct pbuf *q = p; + uint8_t *pu8Chunk = (uint8_t *)pvFrame; + do { + uint8_t *payload = (uint8_t *)q->payload; + size_t len = q->len; + +#if ETH_PAD_SIZE + if (RT_LIKELY(q == p)) // single pbuf is large enough + { + payload += ETH_PAD_SIZE; + len -= ETH_PAD_SIZE; + } +#endif + memcpy(payload, pu8Chunk, len); + pu8Chunk += len; + q = q->next; + } while (RT_UNLIKELY(q != NULL)); + + m_LwipNetIf.input(p, &m_LwipNetIf); + return VINF_SUCCESS; +} + + +int VBoxNetLwipNAT::processGSO(PCPDMNETWORKGSO pGso, size_t cbFrame) +{ + if (!PDMNetGsoIsValid(pGso, cbFrame, + cbFrame - sizeof(PDMNETWORKGSO))) + return VERR_INVALID_PARAMETER; + + cbFrame -= sizeof(PDMNETWORKGSO); + uint8_t abHdrScratch[256]; + uint32_t const cSegs = PDMNetGsoCalcSegmentCount(pGso, + cbFrame); + for (size_t iSeg = 0; iSeg < cSegs; iSeg++) + { + uint32_t cbSegFrame; + void *pvSegFrame = + PDMNetGsoCarveSegmentQD(pGso, + (uint8_t *)(pGso + 1), + cbFrame, + abHdrScratch, + iSeg, + cSegs, + &cbSegFrame); + + int rc = processFrame(pvSegFrame, cbSegFrame); + if (RT_FAILURE(rc)) + { + return rc; + } + } + + return VINF_SUCCESS; +} + + +int VBoxNetLwipNAT::run() +{ + /* Father starts receiving thread and enter event loop. */ + VBoxNetBaseService::run(); + + vboxLwipCoreFinalize(VBoxNetLwipNAT::onLwipTcpIpFini, this); + + m_vecPortForwardRule4.clear(); + m_vecPortForwardRule6.clear(); + + return VINF_SUCCESS; +} + + +/** + * Entry point. + */ +extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv, char **envp) +{ + int rc; + + LogFlowFuncEnter(); + + NOREF(envp); + +#ifdef RT_OS_WINDOWS + WSADATA wsaData; + int err; + + err = WSAStartup(MAKEWORD(2,2), &wsaData); + if (err) + { + fprintf(stderr, "wsastartup: failed (%d)\n", err); + return 1; + } +#endif + + SOCKET icmpsock4 = INVALID_SOCKET; + SOCKET icmpsock6 = INVALID_SOCKET; +#ifndef RT_OS_DARWIN + const int icmpstype = SOCK_RAW; +#else + /* on OS X it's not privileged */ + const int icmpstype = SOCK_DGRAM; +#endif + + icmpsock4 = socket(AF_INET, icmpstype, IPPROTO_ICMP); + if (icmpsock4 == INVALID_SOCKET) + { + perror("IPPROTO_ICMP"); +#ifdef VBOX_RAWSOCK_DEBUG_HELPER + icmpsock4 = getrawsock(AF_INET); +#endif + } + + if (icmpsock4 != INVALID_SOCKET) + { +#ifdef ICMP_FILTER // Linux specific + struct icmp_filter flt = { + ~(uint32_t)( + (1U << ICMP_ECHOREPLY) + | (1U << ICMP_DEST_UNREACH) + | (1U << ICMP_TIME_EXCEEDED) + ) + }; + + int status = setsockopt(icmpsock4, SOL_RAW, ICMP_FILTER, + &flt, sizeof(flt)); + if (status < 0) + { + perror("ICMP_FILTER"); + } +#endif + } + + icmpsock6 = socket(AF_INET6, icmpstype, IPPROTO_ICMPV6); + if (icmpsock6 == INVALID_SOCKET) + { + perror("IPPROTO_ICMPV6"); +#ifdef VBOX_RAWSOCK_DEBUG_HELPER + icmpsock6 = getrawsock(AF_INET6); +#endif + } + + if (icmpsock6 != INVALID_SOCKET) + { +#ifdef ICMP6_FILTER // Windows doesn't support RFC 3542 API + /* + * XXX: We do this here for now, not in pxping.c, to avoid + * name clashes between lwIP and system headers. + */ + struct icmp6_filter flt; + ICMP6_FILTER_SETBLOCKALL(&flt); + + ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &flt); + + ICMP6_FILTER_SETPASS(ICMP6_DST_UNREACH, &flt); + ICMP6_FILTER_SETPASS(ICMP6_PACKET_TOO_BIG, &flt); + ICMP6_FILTER_SETPASS(ICMP6_TIME_EXCEEDED, &flt); + ICMP6_FILTER_SETPASS(ICMP6_PARAM_PROB, &flt); + + int status = setsockopt(icmpsock6, IPPROTO_ICMPV6, ICMP6_FILTER, + &flt, sizeof(flt)); + if (status < 0) + { + perror("ICMP6_FILTER"); + } +#endif + } + + HRESULT hrc = com::Initialize(); + if (FAILED(hrc)) + { +#ifdef VBOX_WITH_XPCOM + if (hrc == NS_ERROR_FILE_ACCESS_DENIED) + { + char szHome[RTPATH_MAX] = ""; + int vrc = com::GetVBoxUserHomeDirectory(szHome, sizeof(szHome), false); + if (RT_SUCCESS(vrc)) + { + return RTMsgErrorExit(RTEXITCODE_FAILURE, + "Failed to initialize COM: %s: %Rhrf", + szHome, hrc); + } + } +#endif // VBOX_WITH_XPCOM + return RTMsgErrorExit(RTEXITCODE_FAILURE, + "Failed to initialize COM: %Rhrf", hrc); + } + + rc = vboxNetNATLogInit(argc, argv); + // shall we bail if we failed to init logging? + + g_pLwipNat = new VBoxNetLwipNAT(icmpsock4, icmpsock6); + + Log2(("NAT: initialization\n")); + rc = g_pLwipNat->parseArgs(argc - 1, argv + 1); + rc = (rc == 0) ? VINF_SUCCESS : VERR_GENERAL_FAILURE; /* XXX: FIXME */ + + if (RT_SUCCESS(rc)) + { + rc = g_pLwipNat->init(); + } + + if (RT_SUCCESS(rc)) + { + g_pLwipNat->run(); + } + + delete g_pLwipNat; + return 0; +} + + +static int vboxNetNATLogInit(int argc, char **argv) +{ + size_t cch; + int rc; + + char szHome[RTPATH_MAX]; + rc = com::GetVBoxUserHomeDirectory(szHome, sizeof(szHome), false); + if (RT_FAILURE(rc)) + return rc; + + const char *pcszNetwork = NULL; + + // XXX: This duplicates information from VBoxNetBaseService.cpp. + // Perhaps option definitions should be exported as public static + // member of VBoxNetBaseService? + static const RTGETOPTDEF s_aOptions[] = { + { "--network", 'n', RTGETOPT_REQ_STRING } + }; + + RTGETOPTSTATE GetState; + RTGetOptInit(&GetState, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, + RTGETOPTINIT_FLAGS_NO_STD_OPTS); + + RTGETOPTUNION ValueUnion; + int ch; + while ((ch = RTGetOpt(&GetState, &ValueUnion))) + { + if (ch == 'n') + { + pcszNetwork = ValueUnion.psz; + break; + } + } + + if (pcszNetwork == NULL) + { + return VERR_MISSING; + } + + char szNetwork[RTPATH_MAX]; + rc = RTStrCopy(szNetwork, sizeof(szNetwork), pcszNetwork); + if (RT_FAILURE(rc)) + { + return rc; + } + + // sanitize network name to be usable as a path component + for (char *p = szNetwork; *p != '\0'; ++p) + { + if (RTPATH_IS_SEP(*p)) + { + *p = '_'; + } + } + + char szLogFile[RTPATH_MAX]; + cch = RTStrPrintf(szLogFile, sizeof(szLogFile), + "%s%c%s.log", szHome, RTPATH_DELIMITER, szNetwork); + if (cch >= sizeof(szLogFile)) + { + return VERR_BUFFER_OVERFLOW; + } + + // sanitize network name some more to be usable as environment variable + for (char *p = szNetwork; *p != '\0'; ++p) + { + if (*p != '_' + && (*p < '0' || '9' < *p) + && (*p < 'a' || 'z' < *p) + && (*p < 'A' || 'Z' < *p)) + { + *p = '_'; + } + } + + char szEnvVarBase[128]; + cch = RTStrPrintf(szEnvVarBase, sizeof(szEnvVarBase), + "VBOXNET_%s_RELEASE_LOG", szNetwork); + if (cch >= sizeof(szEnvVarBase)) + { + return VERR_BUFFER_OVERFLOW; + } + + char szError[RTPATH_MAX + 128]; + rc = com::VBoxLogRelCreate("NAT Network", + szLogFile, + RTLOGFLAGS_PREFIX_TIME_PROG, + "all all.restrict -default.restrict", + szEnvVarBase, + RTLOGDEST_FILE, + 32768 /* cMaxEntriesPerGroup */, + 0 /* cHistory */, + 0 /* uHistoryFileTime */, + 0 /* uHistoryFileSize */, + szError, sizeof(szError)); + return rc; +} + + +static int fetchNatPortForwardRules(const ComNatPtr& nat, bool fIsIPv6, VECNATSERVICEPF& vec) +{ + HRESULT hrc; + com::SafeArray<BSTR> rules; + if (fIsIPv6) + hrc = nat->COMGETTER(PortForwardRules6)(ComSafeArrayAsOutParam(rules)); + else + hrc = nat->COMGETTER(PortForwardRules4)(ComSafeArrayAsOutParam(rules)); + AssertReturn(SUCCEEDED(hrc), VERR_INTERNAL_ERROR); + + NATSEVICEPORTFORWARDRULE Rule; + for (size_t idxRules = 0; idxRules < rules.size(); ++idxRules) + { + Log(("%d-%s rule: %ls\n", idxRules, (fIsIPv6 ? "IPv6" : "IPv4"), rules[idxRules])); + RT_ZERO(Rule); + + int rc = netPfStrToPf(com::Utf8Str(rules[idxRules]).c_str(), + fIsIPv6, &Rule.Pfr); + if (RT_FAILURE(rc)) + continue; + + vec.push_back(Rule); + } + + return VINF_SUCCESS; +} + + +#ifndef VBOX_WITH_HARDENING + +int main(int argc, char **argv, char **envp) +{ + int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB); + if (RT_FAILURE(rc)) + return RTMsgInitFailure(rc); + + return TrustedMain(argc, argv, envp); +} + +# if defined(RT_OS_WINDOWS) + +static LRESULT CALLBACK WindowProc(HWND hwnd, + UINT uMsg, + WPARAM wParam, + LPARAM lParam +) +{ + if(uMsg == WM_DESTROY) + { + PostQuitMessage(0); + return 0; + } + return DefWindowProc (hwnd, uMsg, wParam, lParam); +} + +static LPCWSTR g_WndClassName = L"VBoxNetNatLwipClass"; + +static DWORD WINAPI MsgThreadProc(__in LPVOID lpParameter) +{ + HWND hwnd = 0; + HINSTANCE hInstance = (HINSTANCE)GetModuleHandle (NULL); + bool bExit = false; + + /* Register the Window Class. */ + WNDCLASS wc; + wc.style = 0; + wc.lpfnWndProc = WindowProc; + wc.cbClsExtra = 0; + wc.cbWndExtra = sizeof(void *); + wc.hInstance = hInstance; + wc.hIcon = NULL; + wc.hCursor = NULL; + wc.hbrBackground = (HBRUSH)(COLOR_BACKGROUND + 1); + wc.lpszMenuName = NULL; + wc.lpszClassName = g_WndClassName; + + ATOM atomWindowClass = RegisterClass(&wc); + + if (atomWindowClass != 0) + { + /* Create the window. */ + hwnd = CreateWindowEx (WS_EX_TOOLWINDOW | WS_EX_TRANSPARENT | WS_EX_TOPMOST, + g_WndClassName, g_WndClassName, + WS_POPUPWINDOW, + -200, -200, 100, 100, NULL, NULL, hInstance, NULL); + + if (hwnd) + { + SetWindowPos(hwnd, HWND_TOPMOST, -200, -200, 0, 0, + SWP_NOACTIVATE | SWP_HIDEWINDOW | SWP_NOCOPYBITS | SWP_NOREDRAW | SWP_NOSIZE); + + MSG msg; + while (GetMessage(&msg, NULL, 0, 0)) + { + TranslateMessage(&msg); + DispatchMessage(&msg); + } + + DestroyWindow (hwnd); + + bExit = true; + } + + UnregisterClass (g_WndClassName, hInstance); + } + + if(bExit) + { + /* no need any accuracy here, in anyway the DHCP server usually gets terminated with TerminateProcess */ + exit(0); + } + + return 0; +} + + + +/** (We don't want a console usually.) */ +int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow) +{ +#if 0 + NOREF(hInstance); NOREF(hPrevInstance); NOREF(lpCmdLine); NOREF(nCmdShow); + + HANDLE hThread = CreateThread( + NULL, /*__in_opt LPSECURITY_ATTRIBUTES lpThreadAttributes, */ + 0, /*__in SIZE_T dwStackSize, */ + MsgThreadProc, /*__in LPTHREAD_START_ROUTINE lpStartAddress,*/ + NULL, /*__in_opt LPVOID lpParameter,*/ + 0, /*__in DWORD dwCreationFlags,*/ + NULL /*__out_opt LPDWORD lpThreadId*/ + ); + + if(hThread != NULL) + CloseHandle(hThread); + +#endif + return main(__argc, __argv, environ); +} +# endif /* RT_OS_WINDOWS */ + +#endif /* !VBOX_WITH_HARDENING */ diff --git a/src/VBox/NetworkServices/NAT/VBoxNetNAT.cpp b/src/VBox/NetworkServices/NAT/VBoxNetNAT.cpp deleted file mode 100644 index d031777c..00000000 --- a/src/VBox/NetworkServices/NAT/VBoxNetNAT.cpp +++ /dev/null @@ -1,815 +0,0 @@ -/* $Id: VBoxNetNAT.cpp $ */ -/** @file - * VBoxNetNAT - NAT Service for connecting to IntNet. - */ - -/* - * Copyright (C) 2009 Oracle Corporation - * - * This file is part of VirtualBox Open Source Edition (OSE), as - * available from http://www.virtualbox.org. This file is free software; - * you can redistribute it and/or modify it under the terms of the GNU - * General Public License (GPL) as published by the Free Software - * Foundation, in version 2 as it comes in the "COPYING" file of the - * VirtualBox OSE distribution. VirtualBox OSE is distributed in the - * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. - */ - -/** @page pg_net_nat VBoxNetNAT - * - * Write a few words... - * - */ - -/******************************************************************************* -* Header Files * -*******************************************************************************/ -#include <iprt/net.h> -#include <iprt/initterm.h> -#include <iprt/alloca.h> -#include <iprt/err.h> -#include <iprt/time.h> -#include <iprt/timer.h> -#include <iprt/thread.h> -#include <iprt/stream.h> -#include <iprt/path.h> -#include <iprt/param.h> -#include <iprt/pipe.h> -#include <iprt/getopt.h> -#include <iprt/string.h> -#include <iprt/mem.h> -#include <iprt/message.h> -#include <iprt/req.h> -#include <iprt/file.h> -#include <iprt/semaphore.h> -#define LOG_GROUP LOG_GROUP_NAT_SERVICE -#include <VBox/log.h> - -#include <VBox/sup.h> -#include <VBox/intnet.h> -#include <VBox/intnetinline.h> -#include <VBox/vmm/pdmnetinline.h> -#include <VBox/vmm/vmm.h> -#include <VBox/version.h> - -#include <vector> -#include <string> - -#include "../NetLib/VBoxNetLib.h" -#include "../NetLib/VBoxNetBaseService.h" -#include <libslirp.h> - -#ifdef RT_OS_WINDOWS /* WinMain */ -# include <Windows.h> -# include <stdlib.h> -#else -# include <errno.h> -#endif - - - -/******************************************************************************* -* Structures and Typedefs * -*******************************************************************************/ -static RTGETOPTDEF g_aGetOptDef[] = -{ - { "--pf", 'p', RTGETOPT_REQ_STRING } -}; - -typedef struct NATSEVICEPORTFORWARDRULE -{ - char *pszPortForwardRuleName; - struct in_addr IpV4HostAddr; - uint16_t u16HostPort; - struct in_addr IpV4GuestAddr; - uint16_t u16GuestPort; - bool fUdp; - char *pszStrRaw; -} NATSEVICEPORTFORWARDRULE, *PNATSEVICEPORTFORWARDRULE; - -class VBoxNetNAT : public VBoxNetBaseService -{ -public: - VBoxNetNAT(); - virtual ~VBoxNetNAT(); - void usage(void); - void run(void); - virtual int init(void); - virtual int parseOpt(int rc, const RTGETOPTUNION& getOptVal); - -public: - PNATState m_pNATState; - RTNETADDRIPV4 m_Ipv4Netmask; - bool m_fPassDomain; - RTTHREAD m_ThrNAT; - RTTHREAD m_ThrSndNAT; - RTTHREAD m_ThrUrgSndNAT; -#ifdef RT_OS_WINDOWS - HANDLE m_hWakeupEvent; -#else - RTPIPE m_hPipeWrite; - RTPIPE m_hPipeRead; -#endif - /** Queue for NAT-thread-external events. */ - /** event to wakeup the guest receive thread */ - RTSEMEVENT m_EventSend; - /** event to wakeup the guest urgent receive thread */ - RTSEMEVENT m_EventUrgSend; - - RTREQQUEUE m_hReqQueue; - RTREQQUEUE m_hSendQueue; - RTREQQUEUE m_hUrgSendQueue; - volatile uint32_t cUrgPkt; - volatile uint32_t cPkt; - bool fIsRunning; - std::vector<PNATSEVICEPORTFORWARDRULE> m_vecPortForwardRuleFromCmdLine; -}; - - - -/******************************************************************************* -* Global Variables * -*******************************************************************************/ -/** Pointer to the NAT server. */ -class VBoxNetNAT *g_pNAT; -static DECLCALLBACK(int) AsyncIoThread(RTTHREAD pThread, void *pvUser); -static DECLCALLBACK(int) natSndThread(RTTHREAD pThread, void *pvUser); -static DECLCALLBACK(int) natUrgSndThread(RTTHREAD pThread, void *pvUser); -static void SendWorker(struct mbuf *m, size_t cb); -static void IntNetSendWorker(bool urg, void *pvFrame, size_t cbFrame, struct mbuf *m); - - -static void natNotifyNATThread(void) -{ - int rc; -#ifndef RT_OS_WINDOWS - /* kick select() */ - size_t cbIgnored; - rc = RTPipeWrite(g_pNAT->m_hPipeWrite, "", 1, &cbIgnored); -#else - /* kick WSAWaitForMultipleEvents */ - rc = WSASetEvent(g_pNAT->hWakeupEvent); -#endif - AssertRC(rc); -} - -VBoxNetNAT::VBoxNetNAT() -{ -#if defined(RT_OS_WINDOWS) - /*@todo check if we can remove this*/ - VBoxNetBaseService(); -#endif - m_enmTrunkType = kIntNetTrunkType_WhateverNone; - m_TrunkName = ""; - m_MacAddress.au8[0] = 0x08; - m_MacAddress.au8[1] = 0x00; - m_MacAddress.au8[2] = 0x27; - m_MacAddress.au8[3] = 0x40; - m_MacAddress.au8[4] = 0x41; - m_MacAddress.au8[5] = 0x42; - m_Ipv4Address.u = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2, 2))); - m_Ipv4Netmask.u = RT_H2N_U32_C(0xffffff); - cPkt = 0; - cUrgPkt = 0; - VBoxNetBaseService::init(); - for(unsigned int i = 0; i < RT_ELEMENTS(g_aGetOptDef); ++i) - m_vecOptionDefs.push_back(&g_aGetOptDef[i]); -} - -VBoxNetNAT::~VBoxNetNAT() { } -int VBoxNetNAT::init() -{ - int rc; -#if 0 - using namespace com; - HRESULT hrc = com::Initialize(); - if (FAILED(hrc)) - return RTMsgErrorExit(RTEXITCODE_FAILURE, "Failed to initialize COM!"); -#endif - - /* - * Initialize slirp. - */ - rc = slirp_init(&m_pNATState, RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2, 0))), m_Ipv4Netmask.u, m_fPassDomain, false, 0x40, 100, this); - AssertReleaseRC(rc); - - /* Why ? */ - slirp_set_ethaddr_and_activate_port_forwarding(m_pNATState, &m_MacAddress.au8[0], INADDR_ANY); -#if 0 - in_addr ipv4HostAddr; - in_addr ipv4GuestAddr; - ipv4GuestAddr.s_addr = RT_H2N_U32_C(RT_BSWAP_U32_C(RT_MAKE_U32_FROM_U8( 10, 0, 2, 15))); - ipv4HostAddr.s_addr = INADDR_ANY; - slirp_add_redirect(m_pNATState, false, ipv4HostAddr, 2022, ipv4GuestAddr , 22, NULL); -#endif - std::vector<PNATSEVICEPORTFORWARDRULE>::iterator it; - for (it = m_vecPortForwardRuleFromCmdLine.begin(); it != m_vecPortForwardRuleFromCmdLine.end(); ++it) - { - slirp_add_redirect(m_pNATState, (*it)->fUdp, (*it)->IpV4HostAddr, (*it)->u16HostPort, (*it)->IpV4GuestAddr , (*it)->u16GuestPort, NULL); - RTStrFree((*it)->pszStrRaw); - RTMemFree((*it)); - } - m_vecPortForwardRuleFromCmdLine.clear(); -#ifndef RT_OS_WINDOWS - /* - * Create the control pipe. - */ - rc = RTPipeCreate(&m_hPipeRead, &m_hPipeWrite, 0 /*fFlags*/); - AssertReleaseRC(rc); -#else - m_hWakeupEvent = CreateEvent(NULL, FALSE, FALSE, NULL); /* auto-reset event */ - AssertReleaseRC(m_hWakeupEvent != NULL); - slirp_register_external_event(m_pNATState, m_hWakeupEvent, VBOX_WAKEUP_EVENT_INDEX); -#endif - rc = RTReqQueueCreate(&m_hReqQueue); - AssertReleaseRC(rc); - - rc = RTReqQueueCreate(&m_hSendQueue); - AssertReleaseRC(rc); - - rc = RTReqQueueCreate(&m_hUrgSendQueue); - AssertReleaseRC(rc); - - g_pNAT->fIsRunning = true; - rc = RTThreadCreate(&m_ThrNAT, AsyncIoThread, this, 128 * _1K, RTTHREADTYPE_DEFAULT, 0, "NAT"); - rc = RTThreadCreate(&m_ThrSndNAT, natSndThread, this, 128 * _1K, RTTHREADTYPE_DEFAULT, 0, "SndNAT"); - rc = RTThreadCreate(&m_ThrUrgSndNAT, natUrgSndThread, this, 128 * _1K, RTTHREADTYPE_DEFAULT, 0, "UrgSndNAT"); - rc = RTSemEventCreate(&m_EventSend); - rc = RTSemEventCreate(&m_EventUrgSend); - AssertReleaseRC(rc); - return VINF_SUCCESS; -} - -/* Mandatory functions */ -void VBoxNetNAT::run() -{ - - /* - * The loop. - */ - fIsRunning = true; - PINTNETRINGBUF pRingBuf = &m_pIfBuf->Recv; - RTThreadSetType(RTThreadSelf(), RTTHREADTYPE_IO); - for (;;) - { - /* - * Wait for a packet to become available. - */ - INTNETIFWAITREQ WaitReq; - WaitReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - WaitReq.Hdr.cbReq = sizeof(WaitReq); - WaitReq.pSession = m_pSession; - WaitReq.hIf = m_hIf; - WaitReq.cMillies = 2000; /* 2 secs - the sleep is for some reason uninterruptible... */ /** @todo fix interruptability in SrvIntNet! */ -#if 0 - RTReqProcess(m_hSendQueue, 0); - RTReqProcess(m_hUrgSendQueue, 0); -#endif - int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_WAIT, 0, &WaitReq.Hdr); - if (RT_FAILURE(rc)) - { - if (rc == VERR_TIMEOUT || rc == VERR_INTERRUPTED) - { - natNotifyNATThread(); - continue; - } - LogRel(("VBoxNetNAT: VMMR0_DO_INTNET_IF_WAIT returned %Rrc\n", rc)); - return; - } - - /* - * Process the receive buffer. - */ - PCINTNETHDR pHdr; - while ((pHdr = IntNetRingGetNextFrameToRead(pRingBuf)) != NULL) - { - uint16_t const u16Type = pHdr->u16Type; - size_t cbFrame = pHdr->cbFrame; - size_t cbIgnored; - void *pvSlirpFrame; - struct mbuf *m; - switch (u16Type) - { - case INTNETHDR_TYPE_FRAME: - m = slirp_ext_m_get(g_pNAT->m_pNATState, cbFrame, &pvSlirpFrame, &cbIgnored); - if (!m) - { - LogRel(("NAT: Can't allocate send buffer cbFrame=%u\n", cbFrame)); - break; - } - memcpy(pvSlirpFrame, IntNetHdrGetFramePtr(pHdr, m_pIfBuf), cbFrame); -#if 0 - IntNetRingSkipFrame(&m_pIfBuf->Recv); -#endif - - /* don't wait, we may have to wakeup the NAT thread first */ - rc = RTReqQueueCallEx(m_hReqQueue, NULL /*ppReq*/, 0 /*cMillies*/, RTREQFLAGS_VOID | RTREQFLAGS_NO_WAIT, - (PFNRT)SendWorker, 2, m, cbFrame); - natNotifyNATThread(); - AssertReleaseRC(rc); - break; - case INTNETHDR_TYPE_GSO: -#if 1 - { - /** @todo pass these unmodified. */ - PCPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, m_pIfBuf); - if (!PDMNetGsoIsValid(pGso, cbFrame, cbFrame - sizeof(*pGso))) - { - IntNetRingSkipFrame(&m_pIfBuf->Recv); - STAM_REL_COUNTER_INC(&m_pIfBuf->cStatBadFrames); - continue; - } - - uint8_t abHdrScratch[256]; - cbFrame -= sizeof(PDMNETWORKGSO); - uint32_t const cSegs = PDMNetGsoCalcSegmentCount(pGso, cbFrame); - for (uint32_t iSeg = 0; iSeg < cSegs; iSeg++) - { - uint32_t cbSegFrame; - void *pvSegFrame = PDMNetGsoCarveSegmentQD(pGso, (uint8_t *)(pGso + 1), cbFrame, abHdrScratch, - iSeg, cSegs, &cbSegFrame); - m = slirp_ext_m_get(g_pNAT->m_pNATState, cbSegFrame, &pvSlirpFrame, &cbIgnored); - if (!m) - { - LogRel(("NAT: Can't allocate send buffer cbSegFrame=%u seg=%u/%u\n", - cbSegFrame, iSeg, cSegs)); - break; - } - memcpy(pvSlirpFrame, pvSegFrame, cbSegFrame); - - rc = RTReqQueueCallEx(m_hReqQueue, NULL /*ppReq*/, 0 /*cMillies*/, - RTREQFLAGS_VOID | RTREQFLAGS_NO_WAIT, - (PFNRT)SendWorker, 2, m, cbSegFrame); - natNotifyNATThread(); - AssertReleaseRC(rc); - } - } - break; -#endif - case INTNETHDR_TYPE_PADDING: - break; - default: - STAM_REL_COUNTER_INC(&m_pIfBuf->cStatBadFrames); - break; - } - - IntNetRingSkipFrame(&m_pIfBuf->Recv); - } - } - fIsRunning = false; -} - -void VBoxNetNAT::usage() -{ -} - -int VBoxNetNAT::parseOpt(int rc, const RTGETOPTUNION& Val) -{ - switch (rc) - { - case 'p': - { -#define ITERATE_TO_NEXT_TERM(ch, pRule, strRaw) \ - do { \ - while (*ch != ',') \ - { \ - if (*ch == 0) \ - { \ - if (pRule) \ - RTMemFree(pRule); \ - if(strRaw) \ - RTStrFree(strRaw); \ - return VERR_INVALID_PARAMETER; \ - } \ - ch++; \ - } \ - *ch = '\0'; \ - ch++; \ - } while(0) - PNATSEVICEPORTFORWARDRULE pRule = (PNATSEVICEPORTFORWARDRULE)RTMemAlloc(sizeof(NATSEVICEPORTFORWARDRULE)); - if (!pRule) - return VERR_NO_MEMORY; - char *strName; - char *strProto; - char *strHostIp; - char *strHostPort; - char *strGuestIp; - char *strGuestPort; - char *strRaw = RTStrDup(Val.psz); - char *ch = strRaw; - if (!strRaw) - { - RTMemFree(pRule); - return VERR_NO_MEMORY; - } - - strName = RTStrStrip(ch); - ITERATE_TO_NEXT_TERM(ch, pRule, strRaw); - strProto = RTStrStrip(ch); - ITERATE_TO_NEXT_TERM(ch, pRule, strRaw); - strHostIp = RTStrStrip(ch); - ITERATE_TO_NEXT_TERM(ch, pRule, strRaw); - strHostPort = RTStrStrip(ch); - ITERATE_TO_NEXT_TERM(ch, pRule, strRaw); - strGuestIp = RTStrStrip(ch); - ITERATE_TO_NEXT_TERM(ch, pRule, strRaw); - strGuestPort = RTStrStrip(ch); - if (RTStrICmp(strProto, "udp") == 0) - pRule->fUdp = true; - else if (RTStrICmp(strProto, "tcp") == 0) - pRule->fUdp = false; - else - { - RTStrFree(strRaw); - RTMemFree(pRule); - return VERR_INVALID_PARAMETER; - } - if ( strHostIp == NULL - || inet_aton(strHostIp, &pRule->IpV4HostAddr) == 0) - pRule->IpV4HostAddr.s_addr = INADDR_ANY; - if ( strGuestIp == NULL - || inet_aton(strGuestIp, &pRule->IpV4GuestAddr) == 0) - { - RTMemFree(pRule); - RTMemFree(strRaw); - return VERR_INVALID_PARAMETER; - } - pRule->u16HostPort = RTStrToUInt16(strHostPort); - pRule->u16GuestPort = RTStrToUInt16(strGuestPort); - if ( !pRule->u16HostPort - || !pRule->u16GuestPort) - { - RTMemFree(pRule); - RTMemFree(strRaw); - return VERR_INVALID_PARAMETER; - } - pRule->pszStrRaw = strRaw; - m_vecPortForwardRuleFromCmdLine.push_back(pRule); - return VINF_SUCCESS; -#undef ITERATE_TO_NEXT_TERM - } - default:; - } - return VERR_NOT_FOUND; -} - -/** - * Entry point. - */ -extern "C" DECLEXPORT(int) TrustedMain(int argc, char **argv, char **envp) -{ - Log2(("NAT: main\n")); - g_pNAT = new VBoxNetNAT(); - Log2(("NAT: initialization\n")); - int rc = g_pNAT->parseArgs(argc - 1, argv + 1); - if (!rc) - { - g_pNAT->init(); - Log2(("NAT: parsing command line\n")); - Log2(("NAT: try go online\n")); - g_pNAT->tryGoOnline(); - Log2(("NAT: main loop\n")); - g_pNAT->run(); - } - delete g_pNAT; - return 0; -} - - -/** slirp's hooks */ -extern "C" int slirp_can_output(void * pvUser) -{ - return 1; -} - -extern "C" void slirp_urg_output(void *pvUser, struct mbuf *m, const uint8_t *pu8Buf, int cb) -{ - LogFlowFunc(("ENTER: m:%p, pu8Buf:%p, cb:%d\n", m, pu8Buf, cb)); - int rc = RTReqQueueCallEx(g_pNAT->m_hUrgSendQueue, NULL /*ppReq*/, 0 /*cMillies*/, RTREQFLAGS_VOID | RTREQFLAGS_NO_WAIT, - (PFNRT)IntNetSendWorker, 4, (uintptr_t)1, (uintptr_t)pu8Buf, (uintptr_t)cb, (uintptr_t)m); - ASMAtomicIncU32(&g_pNAT->cUrgPkt); - RTSemEventSignal(g_pNAT->m_EventUrgSend); - AssertReleaseRC(rc); - LogFlowFuncLeave(); -} -extern "C" void slirp_output(void *pvUser, struct mbuf *m, const uint8_t *pu8Buf, int cb) -{ - LogFlowFunc(("ENTER: m:%p, pu8Buf:%p, cb:%d\n", m, pu8Buf, cb)); - AssertRelease(g_pNAT == pvUser); - int rc = RTReqQueueCallEx(g_pNAT->m_hSendQueue, NULL /*ppReq*/, 0 /*cMillies*/, RTREQFLAGS_VOID | RTREQFLAGS_NO_WAIT, - (PFNRT)IntNetSendWorker, 4, (uintptr_t)0, (uintptr_t)pu8Buf, (uintptr_t)cb, (uintptr_t)m); - ASMAtomicIncU32(&g_pNAT->cPkt); - RTSemEventSignal(g_pNAT->m_EventSend); - AssertReleaseRC(rc); - LogFlowFuncLeave(); -} - -extern "C" void slirp_output_pending(void *pvUser) -{ - AssertMsgFailed(("Unimplemented")); -} - -/** - * Worker function for drvNATSend(). - * @thread "NAT" thread. - */ -static void SendWorker(struct mbuf *m, size_t cb) -{ - LogFlowFunc(("ENTER: m:%p ,cb:%d\n", m, cb)); - slirp_input(g_pNAT->m_pNATState, m, cb); - LogFlowFuncLeave(); -} - -static void IntNetSendWorker(bool fUrg, void *pvFrame, size_t cbFrame, struct mbuf *m) -{ - VBoxNetNAT *pThis = g_pNAT; - INTNETIFSENDREQ SendReq; - int rc; - - LogFlowFunc(("ENTER: urg:%RTbool ,pvFrame:%p, cbFrame:%d, m:%p\n", fUrg, pvFrame, cbFrame, m)); - if (!fUrg) - { - /* non-urgent datagramm sender */ - while ( ASMAtomicReadU32(&g_pNAT->cUrgPkt) != 0 - || ASMAtomicReadU32(&g_pNAT->cPkt) == 0) - rc = RTSemEventWait(g_pNAT->m_EventSend, RT_INDEFINITE_WAIT); - } - else - { - while (ASMAtomicReadU32(&g_pNAT->cUrgPkt) == 0) - rc = RTSemEventWait(g_pNAT->m_EventUrgSend, RT_INDEFINITE_WAIT); - } - rc = IntNetRingWriteFrame(&pThis->m_pIfBuf->Send, pvFrame, cbFrame); - if (RT_FAILURE(rc)) - { - SendReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - SendReq.Hdr.cbReq = sizeof(SendReq); - SendReq.pSession = pThis->m_pSession; - SendReq.hIf = pThis->m_hIf; - rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SEND, 0, &SendReq.Hdr); - - rc = IntNetRingWriteFrame(&pThis->m_pIfBuf->Send, pvFrame, cbFrame); - - } - if (RT_SUCCESS(rc)) - { - SendReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - SendReq.Hdr.cbReq = sizeof(SendReq); - SendReq.pSession = pThis->m_pSession; - SendReq.hIf = pThis->m_hIf; - rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SEND, 0, &SendReq.Hdr); - } - AssertRC((rc)); - if (RT_FAILURE(rc)) - Log2(("VBoxNetNAT: Failed to send packet; rc=%Rrc\n", rc)); - - if (!fUrg) - { - ASMAtomicDecU32(&g_pNAT->cPkt); - } - else { - if (ASMAtomicDecU32(&g_pNAT->cUrgPkt) == 0) - RTSemEventSignal(g_pNAT->m_EventSend); - } - slirp_ext_m_free(pThis->m_pNATState, m, (uint8_t *)pvFrame); - natNotifyNATThread(); - LogFlowFuncLeave(); -} - -static DECLCALLBACK(int) AsyncIoThread(RTTHREAD pThread, void *pvUser) -{ - VBoxNetNAT *pThis = (VBoxNetNAT *)pvUser; - int nFDs = -1; -#ifdef RT_OS_WINDOWS - HANDLE *pahEvents = slirp_get_events(pThis->m_pNATState); -#else /* RT_OS_WINDOWS */ - unsigned int cPollNegRet = 0; -#endif /* !RT_OS_WINDOWS */ - - LogFlow(("drvNATAsyncIoThread: pThis=%p\n", pThis)); - - /* - * Polling loop. - */ - for(;;) - { - /* - * To prevent concurrent execution of sending/receiving threads - */ -#ifndef RT_OS_WINDOWS - nFDs = slirp_get_nsock(pThis->m_pNATState); - /* allocation for all sockets + Management pipe */ - struct pollfd *polls = (struct pollfd *)RTMemAlloc((1 + nFDs) * sizeof(struct pollfd) + sizeof(uint32_t)); - if (polls == NULL) - return VERR_NO_MEMORY; - - /* don't pass the management pipe */ - slirp_select_fill(pThis->m_pNATState, &nFDs, &polls[1]); - unsigned int cMsTimeout = slirp_get_timeout_ms(pThis->m_pNATState); - - polls[0].fd = RTPipeToNative(pThis->m_hPipeRead); - /* POLLRDBAND usually doesn't used on Linux but seems used on Solaris */ - polls[0].events = POLLRDNORM|POLLPRI|POLLRDBAND; - polls[0].revents = 0; - - int cChangedFDs = poll(polls, nFDs + 1, cMsTimeout); - if (cChangedFDs < 0) - { - if (errno == EINTR) - { - Log2(("NAT: signal was caught while sleep on poll\n")); - /* No error, just process all outstanding requests but don't wait */ - cChangedFDs = 0; - } - else if (cPollNegRet++ > 128) - { - LogRel(("NAT:Poll returns (%s) suppressed %d\n", strerror(errno), cPollNegRet)); - cPollNegRet = 0; - } - } - - if (cChangedFDs >= 0) - { - slirp_select_poll(pThis->m_pNATState, &polls[1], nFDs); - if (polls[0].revents & (POLLRDNORM|POLLPRI|POLLRDBAND)) - { - /* drain the pipe - * - * Note! - * drvNATSend decoupled so we don't know how many times - * device's thread sends before we've entered multiplex, - * so to avoid false alarm drain pipe here to the very end - * - * @todo: Probably we should counter drvNATSend to count how - * deep pipe has been filed before drain. - * - */ - /** @todo XXX: Make it reading exactly we need to drain the - * pipe. */ - char ch; - size_t cbRead; - RTPipeRead(pThis->m_hPipeRead, &ch, 1, &cbRead); - } - } - /* process _all_ outstanding requests but don't wait */ - RTReqQueueProcess(pThis->m_hReqQueue, 0); - RTMemFree(polls); - -#else /* RT_OS_WINDOWS */ - nFDs = -1; - slirp_select_fill(pThis->m_pNATState, &nFDs); - DWORD dwEvent = WSAWaitForMultipleEvents(nFDs, pahEvents, FALSE, - slirp_get_timeout_ms(pThis->m_pNATState), - FALSE); - if ( (dwEvent < WSA_WAIT_EVENT_0 || dwEvent > WSA_WAIT_EVENT_0 + nFDs - 1) - && dwEvent != WSA_WAIT_TIMEOUT) - { - int error = WSAGetLastError(); - LogRel(("NAT: WSAWaitForMultipleEvents returned %d (error %d)\n", dwEvent, error)); - RTAssertReleasePanic(); - } - - if (dwEvent == WSA_WAIT_TIMEOUT) - { - /* only check for slow/fast timers */ - slirp_select_poll(pThis->m_pNATState, /* fTimeout=*/true, /*fIcmp=*/false); - continue; - } - - /* poll the sockets in any case */ - slirp_select_poll(pThis->m_pNATState, /* fTimeout=*/false, /* fIcmp=*/(dwEvent == WSA_WAIT_EVENT_0)); - /* process _all_ outstanding requests but don't wait */ - RTReqQueueProcess(pThis->m_hReqQueue, 0); -#endif /* RT_OS_WINDOWS */ - } - - return VINF_SUCCESS; -} - -static DECLCALLBACK(int) natSndThread(RTTHREAD pThread, void *pvUser) -{ - while (g_pNAT->fIsRunning) - RTReqQueueProcess(g_pNAT->m_hSendQueue, 0); - return VINF_SUCCESS; -} -static DECLCALLBACK(int) natUrgSndThread(RTTHREAD pThread, void *pvUser) -{ - while (g_pNAT->fIsRunning) - RTReqQueueProcess(g_pNAT->m_hUrgSendQueue, 0); - return VINF_SUCCESS; -} - -#ifndef VBOX_WITH_HARDENING - -int main(int argc, char **argv, char **envp) -{ - int rc = RTR3InitExe(argc, &argv, RTR3INIT_FLAGS_SUPLIB); - if (RT_FAILURE(rc)) - return RTMsgInitFailure(rc); - - return TrustedMain(argc, argv, envp); -} - -# if defined(RT_OS_WINDOWS) - -static LRESULT CALLBACK WindowProc(HWND hwnd, - UINT uMsg, - WPARAM wParam, - LPARAM lParam -) -{ - if(uMsg == WM_DESTROY) - { - PostQuitMessage(0); - return 0; - } - return DefWindowProc (hwnd, uMsg, wParam, lParam); -} - -static LPCSTR g_WndClassName = "VBoxNetNatClass"; - -static DWORD WINAPI MsgThreadProc(__in LPVOID lpParameter) -{ - HWND hwnd = 0; - HINSTANCE hInstance = (HINSTANCE)GetModuleHandle (NULL); - bool bExit = false; - - /* Register the Window Class. */ - WNDCLASS wc; - wc.style = 0; - wc.lpfnWndProc = WindowProc; - wc.cbClsExtra = 0; - wc.cbWndExtra = sizeof(void *); - wc.hInstance = hInstance; - wc.hIcon = NULL; - wc.hCursor = NULL; - wc.hbrBackground = (HBRUSH)(COLOR_BACKGROUND + 1); - wc.lpszMenuName = NULL; - wc.lpszClassName = g_WndClassName; - - ATOM atomWindowClass = RegisterClass(&wc); - - if (atomWindowClass != 0) - { - /* Create the window. */ - hwnd = CreateWindowEx (WS_EX_TOOLWINDOW | WS_EX_TRANSPARENT | WS_EX_TOPMOST, - g_WndClassName, g_WndClassName, - WS_POPUPWINDOW, - -200, -200, 100, 100, NULL, NULL, hInstance, NULL); - - if (hwnd) - { - SetWindowPos(hwnd, HWND_TOPMOST, -200, -200, 0, 0, - SWP_NOACTIVATE | SWP_HIDEWINDOW | SWP_NOCOPYBITS | SWP_NOREDRAW | SWP_NOSIZE); - - MSG msg; - while (GetMessage(&msg, NULL, 0, 0)) - { - TranslateMessage(&msg); - DispatchMessage(&msg); - } - - DestroyWindow (hwnd); - - bExit = true; - } - - UnregisterClass (g_WndClassName, hInstance); - } - - if(bExit) - { - /* no need any accuracy here, in anyway the DHCP server usually gets terminated with TerminateProcess */ - exit(0); - } - - return 0; -} - - - -/** (We don't want a console usually.) */ -int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nCmdShow) -{ -#if 0 - NOREF(hInstance); NOREF(hPrevInstance); NOREF(lpCmdLine); NOREF(nCmdShow); - - HANDLE hThread = CreateThread( - NULL, /*__in_opt LPSECURITY_ATTRIBUTES lpThreadAttributes, */ - 0, /*__in SIZE_T dwStackSize, */ - MsgThreadProc, /*__in LPTHREAD_START_ROUTINE lpStartAddress,*/ - NULL, /*__in_opt LPVOID lpParameter,*/ - 0, /*__in DWORD dwCreationFlags,*/ - NULL /*__out_opt LPDWORD lpThreadId*/ - ); - - if(hThread != NULL) - CloseHandle(hThread); - -#endif - return main(__argc, __argv, environ); -} -# endif /* RT_OS_WINDOWS */ - -#endif /* !VBOX_WITH_HARDENING */ - diff --git a/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp b/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp index b0d74e73..f22376d2 100644 --- a/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp +++ b/src/VBox/NetworkServices/NAT/VBoxNetNATHardened.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2009 Oracle Corporation + * Copyright (C) 2009-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -17,8 +17,11 @@ #include <VBox/sup.h> +#ifndef SERVICE_NAME +# error "Please define SERVICE_NAME" +#endif int main(int argc, char **argv, char **envp) { - return SUPR3HardenedMain("VBoxNetNAT", 0 /* fFlags */, argc, argv, envp); + return SUPR3HardenedMain(SERVICE_NAME, 0 /* fFlags */, argc, argv, envp); } diff --git a/src/VBox/NetworkServices/NAT/dhcp6.h b/src/VBox/NetworkServices/NAT/dhcp6.h new file mode 100644 index 00000000..358911cd --- /dev/null +++ b/src/VBox/NetworkServices/NAT/dhcp6.h @@ -0,0 +1,32 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#ifndef _DHCP6_H_ +#define _DHCP6_H_ + +/* UDP ports */ +#define DHCP6_CLIENT_PORT 546 +#define DHCP6_SERVER_PORT 547 + +/* Message types */ +#define DHCP6_REPLY 7 +#define DHCP6_INFORMATION_REQUEST 11 +#define DHCP6_RELAY_FORW 12 +#define DHCP6_RELAY_REPLY 13 + +/* DUID types */ +#define DHCP6_DUID_LLT 1 +#define DHCP6_DUID_EN 2 +#define DHCP6_DUID_LL 3 + +/* Hardware type for DUID-LLT and DUID-LL */ +#define ARES_HRD_ETHERNET 1 /* RFC 826*/ + +/* Options */ +#define DHCP6_OPTION_CLIENTID 1 +#define DHCP6_OPTION_SERVERID 2 +#define DHCP6_OPTION_ORO 6 +#define DHCP6_OPTION_ELAPSED_TIME 8 +#define DHCP6_OPTION_STATUS_CODE 13 +#define DHCP6_OPTION_DNS_SERVERS 23 /* RFC 3646 */ +#define DHCP6_OPTION_DOMAIN_LIST 24 /* RFC 3646 */ + +#endif /* _DHCP6_H_ */ diff --git a/src/VBox/NetworkServices/NAT/fwtcp.c b/src/VBox/NetworkServices/NAT/fwtcp.c new file mode 100644 index 00000000..fa0fde4a --- /dev/null +++ b/src/VBox/NetworkServices/NAT/fwtcp.c @@ -0,0 +1,315 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "winutils.h" +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "portfwd.h" +#include "pxtcp.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <stdio.h> +#include <poll.h> + +#include <err.h> /* BSD'ism */ +#else +#include <stdio.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" + + +/** + */ +struct fwtcp { + /** + * Our poll manager handler. + */ + struct pollmgr_handler pmhdl; + + /** + * Forwarding specification. + */ + struct fwspec fwspec; + + /** + * Listening socket. + */ + SOCKET sock; + + /** + * Mailbox for new inbound connections. + * + * XXX: since we have single producer and single consumer we can + * use lockless ringbuf like for pxtcp. + */ + sys_mbox_t connmbox; + + struct tcpip_msg msg_connect; + struct tcpip_msg msg_delete; + + /** + * Linked list entry. + */ + struct fwtcp *next; +}; + + +static struct fwtcp *fwtcp_create(struct fwspec *); + +/* poll manager callback for fwtcp listening socket */ +static int fwtcp_pmgr_listen(struct pollmgr_handler *, SOCKET, int); + +/* lwip thread callbacks called via proxy_lwip_post() */ +static void fwtcp_pcb_connect(void *); +static void fwtcp_pcb_delete(void *); + + +/** + * Linked list of active fwtcp forwarders. + */ +struct fwtcp *fwtcp_list = NULL; + + +void +fwtcp_init(void) +{ + return; +} + + +void +fwtcp_add(struct fwspec *fwspec) +{ + struct fwtcp *fwtcp; + + fwtcp = fwtcp_create(fwspec); + if (fwtcp == NULL) { + DPRINTF0(("%s: failed to add rule for TCP ...\n", __func__)); + return; + } + + DPRINTF0(("%s\n", __func__)); + /* fwtcp_create has put fwtcp on the linked list */ +} + + +void +fwtcp_del(struct fwspec *fwspec) +{ + struct fwtcp *fwtcp; + struct fwtcp **pprev; + + for (pprev = &fwtcp_list; (fwtcp = *pprev) != NULL; pprev = &fwtcp->next) { + if (fwspec_equal(&fwtcp->fwspec, fwspec)) { + *pprev = fwtcp->next; + fwtcp->next = NULL; + break; + } + } + + if (fwtcp == NULL) { + DPRINTF0(("%s: not found\n", __func__)); + return; + } + + DPRINTF0(("%s\n", __func__)); + + pollmgr_del_slot(fwtcp->pmhdl.slot); + fwtcp->pmhdl.slot = -1; + + closesocket(fwtcp->sock); + fwtcp->sock = INVALID_SOCKET; + + /* let pending msg_connect be processed before we delete fwtcp */ + proxy_lwip_post(&fwtcp->msg_delete); +} + + +struct fwtcp * +fwtcp_create(struct fwspec *fwspec) +{ + struct fwtcp *fwtcp; + SOCKET lsock; + int status; + err_t error; + + lsock = proxy_bound_socket(fwspec->sdom, fwspec->stype, &fwspec->src.sa); + if (lsock == INVALID_SOCKET) { + perror("socket"); + return NULL; + } + + fwtcp = (struct fwtcp *)malloc(sizeof(*fwtcp)); + if (fwtcp == NULL) { + closesocket(lsock); + return NULL; + } + + fwtcp->pmhdl.callback = fwtcp_pmgr_listen; + fwtcp->pmhdl.data = (void *)fwtcp; + fwtcp->pmhdl.slot = -1; + + fwtcp->sock = lsock; + fwtcp->fwspec = *fwspec; /* struct copy */ + + error = sys_mbox_new(&fwtcp->connmbox, 16); + if (error != ERR_OK) { + closesocket(lsock); + free(fwtcp); + return (NULL); + } + +#define CALLBACK_MSG(MSG, FUNC) \ + do { \ + fwtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \ + fwtcp->MSG.sem = NULL; \ + fwtcp->MSG.msg.cb.function = FUNC; \ + fwtcp->MSG.msg.cb.ctx = (void *)fwtcp; \ + } while (0) + + CALLBACK_MSG(msg_connect, fwtcp_pcb_connect); + CALLBACK_MSG(msg_delete, fwtcp_pcb_delete); + +#undef CALLBACK_MSG + + status = pollmgr_add(&fwtcp->pmhdl, fwtcp->sock, POLLIN); + if (status < 0) { + sys_mbox_free(&fwtcp->connmbox); + closesocket(lsock); + free(fwtcp); + return NULL; + } + + fwtcp->next = fwtcp_list; + fwtcp_list = fwtcp; + + return fwtcp; +} + + +int +fwtcp_pmgr_listen(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct fwtcp *fwtcp; + struct sockaddr_storage ss; + socklen_t sslen; + void *peer_addr; + uint16_t peer_port; + struct pxtcp *pxtcp; + SOCKET newsock; + int status; + err_t error; + + fwtcp = (struct fwtcp *)handler->data; + pxtcp = NULL; + + LWIP_ASSERT1(fwtcp != NULL); + LWIP_ASSERT1(fd == fwtcp->sock); + LWIP_ASSERT1(revents == POLLIN); + LWIP_UNUSED_ARG(fd); + LWIP_UNUSED_ARG(revents); + + LWIP_ASSERT1(sys_mbox_valid(&fwtcp->connmbox)); + + sslen = sizeof(ss); + newsock = accept(fwtcp->sock, (struct sockaddr *)&ss, &sslen); + if (newsock == INVALID_SOCKET) { + return POLLIN; + } + + + if (ss.ss_family == PF_INET) { + struct sockaddr_in *peer4 = (struct sockaddr_in *)&ss; + peer_addr = &peer4->sin_addr; + peer_port = peer4->sin_port; + } + else { /* PF_INET6 */ + struct sockaddr_in6 *peer6 = (struct sockaddr_in6 *)&ss; + peer_addr = &peer6->sin6_addr; + peer_port = peer6->sin6_port; + } + peer_port = ntohs(peer_port); + +#if PLEASE_ABSTAIN_FROM_DPRINFING > 1 /* DPRINTF */ && !defined(RT_OS_WINDOWS) + { + char addrbuf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; + const char *addrstr; + + addrstr = inet_ntop(ss.ss_family, peer_addr, addrbuf, sizeof(addrbuf)); + DPRINTF(("<--- TCP %s%s%s:%d\n", + ss.ss_family == AF_INET6 ? "[" : "", + addrstr, + ss.ss_family == AF_INET6 ? "]" : "", + peer_port)); + } +#endif /* DPRINTF */ + + + pxtcp = pxtcp_create_forwarded(newsock); + if (pxtcp == NULL) { + proxy_reset_socket(newsock); + return POLLIN; + } + + status = pxtcp_pmgr_add(pxtcp); + if (status < 0) { + pxtcp_cancel_forwarded(pxtcp); + return POLLIN; + } + + error = sys_mbox_trypost(&fwtcp->connmbox, (void *)pxtcp); + if (error != ERR_OK) { + pxtcp_pmgr_del(pxtcp); + pxtcp_cancel_forwarded(pxtcp); + return POLLIN; + } + + proxy_lwip_post(&fwtcp->msg_connect); + return POLLIN; +} + + +void +fwtcp_pcb_connect(void *arg) +{ + struct fwtcp *fwtcp = (struct fwtcp *)arg; + struct pxtcp *pxtcp; + u32_t timo; + + if (!sys_mbox_valid(&fwtcp->connmbox)) { + return; + } + + pxtcp = NULL; + timo = sys_mbox_tryfetch(&fwtcp->connmbox, (void **)&pxtcp); + if (timo == SYS_MBOX_EMPTY) { + return; + } + + LWIP_ASSERT1(pxtcp != NULL); + + /* hand off to pxtcp */ + pxtcp_pcb_connect(pxtcp, &fwtcp->fwspec); +} + + +static void +fwtcp_pcb_delete(void *arg) +{ + struct fwtcp *fwtcp = (struct fwtcp *)arg; + void *data; + u32_t timo; + + timo = sys_mbox_tryfetch(&fwtcp->connmbox, &data); + LWIP_ASSERT1(timo == SYS_MBOX_EMPTY); + LWIP_UNUSED_ARG(timo); /* only in assert */ + + sys_mbox_free(&fwtcp->connmbox); + free(fwtcp); +} diff --git a/src/VBox/NetworkServices/NAT/fwudp.c b/src/VBox/NetworkServices/NAT/fwudp.c new file mode 100644 index 00000000..1cc256f0 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/fwudp.c @@ -0,0 +1,523 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "winutils.h" +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "portfwd.h" +#include "pxremap.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <stdio.h> +#include <string.h> +#include <poll.h> + +#include <err.h> /* BSD'ism */ +#else +#include <stdio.h> +#include <string.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" +#include "lwip/memp.h" /* XXX: for bulk delete of pcbs */ + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/udp.h" + +struct fwudp_dgram { + struct pbuf *p; + ipX_addr_t src_addr; + u16_t src_port; +}; + +/** + * UDP port-forwarding. + * + * Unlike pxudp that uses 1:1 mapping between pcb and socket, for + * port-forwarded UDP the setup is bit more elaborated. + * + * For fwtcp things are simple since incoming TCP connection get a new + * socket that we just hand off to pxtcp. Thus fwtcp only handles + * connection initiation. + * + * For fwudp all proxied UDP conversations share the same socket, so + * single fwudp multiplexes to several UDP pcbs. + * + * XXX: TODO: Currently pcbs point back directly to fwudp. It might + * make sense to introduce a per-pcb structure that points to fwudp + * and carries additional information, like pre-mapped peer address. + */ +struct fwudp { + /** + * Our poll manager handler. + */ + struct pollmgr_handler pmhdl; + + /** + * Forwarding specification. + */ + struct fwspec fwspec; + + /** + * XXX: lwip-format copy of destination + */ + ipX_addr_t dst_addr; + u16_t dst_port; + + /** + * Listening socket. + */ + SOCKET sock; + + /** + * Ring-buffer for inbound datagrams. + */ + struct { + struct fwudp_dgram *buf; + size_t bufsize; + volatile size_t vacant; + volatile size_t unsent; + } inbuf; + + struct tcpip_msg msg_send; + struct tcpip_msg msg_delete; + + struct fwudp *next; +}; + + +struct fwudp *fwudp_create(struct fwspec *); + +/* poll manager callback for fwudp socket */ +static int fwudp_pmgr_pump(struct pollmgr_handler *, SOCKET, int); + +/* lwip thread callbacks called via proxy_lwip_post() */ +static void fwudp_pcb_send(void *); +static void fwudp_pcb_delete(void *); + +static void fwudp_pcb_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); +static void fwudp_pcb_forward_outbound(struct fwudp *, struct udp_pcb *, struct pbuf *); + + +/** + * Linked list of active fwtcp forwarders. + */ +struct fwudp *fwudp_list = NULL; + + +void +fwudp_init(void) +{ + return; +} + + +void +fwudp_add(struct fwspec *fwspec) +{ + struct fwudp *fwudp; + + fwudp = fwudp_create(fwspec); + if (fwudp == NULL) { + DPRINTF0(("%s: failed to add rule for UDP ...\n", __func__)); + return; + } + + DPRINTF0(("%s\n", __func__)); + /* fwudp_create has put fwudp on the linked list */ +} + + +void +fwudp_del(struct fwspec *fwspec) +{ + struct fwudp *fwudp; + struct fwudp **pprev; + + for (pprev = &fwudp_list; (fwudp = *pprev) != NULL; pprev = &fwudp->next) { + if (fwspec_equal(&fwudp->fwspec, fwspec)) { + *pprev = fwudp->next; + fwudp->next = NULL; + break; + } + } + + if (fwudp == NULL) { + DPRINTF0(("%s: not found\n", __func__)); + return; + } + + DPRINTF0(("%s\n", __func__)); + + pollmgr_del_slot(fwudp->pmhdl.slot); + fwudp->pmhdl.slot = -1; + + /* let pending msg_send be processed before we delete fwudp */ + proxy_lwip_post(&fwudp->msg_delete); +} + + +struct fwudp * +fwudp_create(struct fwspec *fwspec) +{ + struct fwudp *fwudp; + SOCKET sock; + int status; + + sock = proxy_bound_socket(fwspec->sdom, fwspec->stype, &fwspec->src.sa); + if (sock == INVALID_SOCKET) { + perror("socket"); + return NULL; + } + + fwudp = (struct fwudp *)malloc(sizeof(*fwudp)); + if (fwudp == NULL) { + closesocket(sock); + return NULL; + } + + fwudp->pmhdl.callback = fwudp_pmgr_pump; + fwudp->pmhdl.data = (void *)fwudp; + fwudp->pmhdl.slot = -1; + + fwudp->sock = sock; + fwudp->fwspec = *fwspec; /* struct copy */ + + /* XXX */ + if (fwspec->sdom == PF_INET) { + struct sockaddr_in *dst4 = &fwspec->dst.sin; + memcpy(&fwudp->dst_addr.ip4, &dst4->sin_addr, sizeof(ip_addr_t)); + fwudp->dst_port = htons(dst4->sin_port); + } + else { /* PF_INET6 */ + struct sockaddr_in6 *dst6 = &fwspec->dst.sin6; + memcpy(&fwudp->dst_addr.ip6, &dst6->sin6_addr, sizeof(ip6_addr_t)); + fwudp->dst_port = htons(dst6->sin6_port); + } + + fwudp->inbuf.bufsize = 256; /* elements */ + fwudp->inbuf.buf + = (struct fwudp_dgram *)calloc(fwudp->inbuf.bufsize, + sizeof(struct fwudp_dgram)); + if (fwudp->inbuf.buf == NULL) { + closesocket(sock); + free(fwudp); + return (NULL); + } + fwudp->inbuf.vacant = 0; + fwudp->inbuf.unsent = 0; + +#define CALLBACK_MSG(MSG, FUNC) \ + do { \ + fwudp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \ + fwudp->MSG.sem = NULL; \ + fwudp->MSG.msg.cb.function = FUNC; \ + fwudp->MSG.msg.cb.ctx = (void *)fwudp; \ + } while (0) + + CALLBACK_MSG(msg_send, fwudp_pcb_send); + CALLBACK_MSG(msg_delete, fwudp_pcb_delete); + +#undef CALLBACK_MSG + + status = pollmgr_add(&fwudp->pmhdl, fwudp->sock, POLLIN); + if (status < 0) { + closesocket(sock); + free(fwudp->inbuf.buf); + free(fwudp); + return NULL; + } + + fwudp->next = fwudp_list; + fwudp_list = fwudp; + + return fwudp; +} + + +/** + * Poll manager callaback for fwudp::sock + */ +int +fwudp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct fwudp *fwudp; + struct sockaddr_storage ss; + socklen_t sslen = sizeof(ss); + size_t beg, lim; + struct fwudp_dgram *dgram; + struct pbuf *p; + ssize_t nread; + int status; + err_t error; + + fwudp = (struct fwudp *)handler->data; + + LWIP_ASSERT1(fwudp != NULL); + LWIP_ASSERT1(fd == fwudp->sock); + LWIP_ASSERT1(revents == POLLIN); + LWIP_UNUSED_ARG(fd); + LWIP_UNUSED_ARG(revents); + + nread = recvfrom(fwudp->sock, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0, + (struct sockaddr *)&ss, &sslen); + if (nread < 0) { + perror(__func__); + return POLLIN; + } + + /* Check that ring buffer is not full */ + lim = fwudp->inbuf.unsent; + if (lim == 0) { + lim = fwudp->inbuf.bufsize - 1; /* guard slot at the end */ + } + else { + --lim; + } + + beg = fwudp->inbuf.vacant; + if (beg == lim) { /* no vacant slot */ + return POLLIN; + } + + + dgram = &fwudp->inbuf.buf[beg]; + + + status = fwany_ipX_addr_set_src(&dgram->src_addr, (struct sockaddr *)&ss); + if (status == PXREMAP_FAILED) { + return POLLIN; + } + + if (ss.ss_family == AF_INET) { + const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss; + dgram->src_port = htons(peer4->sin_port); + } + else { /* PF_INET6 */ + const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss; + dgram->src_port = htons(peer6->sin6_port); + } + + p = pbuf_alloc(PBUF_RAW, nread, PBUF_RAM); + if (p == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread)); + return POLLIN; + } + + error = pbuf_take(p, pollmgr_udpbuf, nread); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread)); + pbuf_free(p); + return POLLIN; + } + + dgram->p = p; + + ++beg; + if (beg == fwudp->inbuf.bufsize) { + beg = 0; + } + fwudp->inbuf.vacant = beg; + + proxy_lwip_post(&fwudp->msg_send); + + return POLLIN; +} + + +/** + * Lwip thread callback invoked via fwudp::msg_send + */ +void +fwudp_pcb_send(void *arg) +{ + struct fwudp *fwudp = (struct fwudp *)arg; + struct fwudp_dgram dgram; + struct udp_pcb *pcb; + struct udp_pcb **pprev; + int isv6; + size_t idx; + + idx = fwudp->inbuf.unsent; + + if (idx == fwudp->inbuf.vacant) { + /* empty buffer - shouldn't happen! */ + DPRINTF(("%s: ring buffer empty!\n", __func__)); + return; + } + + dgram = fwudp->inbuf.buf[idx]; /* struct copy */ +#if 1 /* valgrind hint */ + fwudp->inbuf.buf[idx].p = NULL; +#endif + if (++idx == fwudp->inbuf.bufsize) { + idx = 0; + } + fwudp->inbuf.unsent = idx; + + /* XXX: this is *STUPID* */ + isv6 = (fwudp->fwspec.sdom == PF_INET6); + pprev = &udp_proxy_pcbs; + for (pcb = udp_proxy_pcbs; pcb != NULL; pcb = pcb->next) { + if (PCB_ISIPV6(pcb) == isv6 + && pcb->remote_port == fwudp->dst_port + && ipX_addr_cmp(isv6, &fwudp->dst_addr, &pcb->remote_ip) + && pcb->local_port == dgram.src_port + && ipX_addr_cmp(isv6, &dgram.src_addr, &pcb->local_ip)) + { + break; + } + else { + pprev = &pcb->next; + } + } + + if (pcb != NULL) { + *pprev = pcb->next; + pcb->next = udp_proxy_pcbs; + udp_proxy_pcbs = pcb; + + /* + * XXX: check that its ours and not accidentally created by + * outbound traffic. + * + * ???: Otherwise? Expire it and set pcb = NULL; to create a + * new one below? + */ + } + + if (pcb == NULL) { + pcb = udp_new(); + if (pcb == NULL) { + goto out; + } + + ip_set_v6(pcb, isv6); + + /* equivalent of udp_bind */ + ipX_addr_set(isv6, &pcb->local_ip, &dgram.src_addr); + pcb->local_port = dgram.src_port; + + /* equivalent to udp_connect */ + ipX_addr_set(isv6, &pcb->remote_ip, &fwudp->dst_addr); + pcb->remote_port = fwudp->dst_port; + pcb->flags |= UDP_FLAGS_CONNECTED; + + udp_recv(pcb, fwudp_pcb_recv, fwudp); + + pcb->next = udp_proxy_pcbs; + udp_proxy_pcbs = pcb; + udp_proxy_timer_needed(); + } + + udp_send(pcb, dgram.p); + + out: + pbuf_free(dgram.p); +} + + +/** + * udp_recv() callback. + */ +void +fwudp_pcb_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct fwudp *fwudp = (struct fwudp *)arg; + + LWIP_UNUSED_ARG(addr); + LWIP_UNUSED_ARG(port); + + LWIP_ASSERT1(fwudp != NULL); + + if (p == NULL) { + DPRINTF(("%s: pcb %p (fwudp %p); sock %d: expired\n", + __func__, (void *)pcb, (void *)fwudp, fwudp->sock)); + /* NB: fwudp is "global" and not deleted */ + /* XXX: TODO: delete local reference when we will keep one */ + udp_remove(pcb); + return; + } + else { + fwudp_pcb_forward_outbound(fwudp, pcb, p); + } +} + + +/* + * XXX: This is pxudp_pcb_forward_outbound modulo: + * - s/pxudp/fwudp/g + * - addr/port (unused in either) dropped + * - destination is specified since host socket is not connected + */ +static void +fwudp_pcb_forward_outbound(struct fwudp *fwudp, struct udp_pcb *pcb, + struct pbuf *p) +{ + union { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } peer; + socklen_t namelen; + + memset(&peer, 0, sizeof(peer)); /* XXX: shut up valgrind */ + + if (fwudp->fwspec.sdom == PF_INET) { + peer.sin.sin_family = AF_INET; +#if HAVE_SA_LEN + peer.sin.sin_len = +#endif + namelen = sizeof(peer.sin); + pxremap_outbound_ip4((ip_addr_t *)&peer.sin.sin_addr, &pcb->local_ip.ip4); + peer.sin.sin_port = htons(pcb->local_port); + } + else { + peer.sin6.sin6_family = AF_INET6; +#if HAVE_SA_LEN + peer.sin6.sin6_len = +#endif + namelen = sizeof(peer.sin6); + + pxremap_outbound_ip6((ip6_addr_t *)&peer.sin6.sin6_addr, &pcb->local_ip.ip6); + peer.sin6.sin6_port = htons(pcb->local_port); + } + + proxy_sendto(fwudp->sock, p, &peer, namelen); + pbuf_free(p); +} + + +/** + * Lwip thread callback invoked via fwudp::msg_delete + */ +static void +fwudp_pcb_delete(void *arg) +{ + struct fwudp *fwudp = (struct fwudp *)arg; + struct udp_pcb *pcb; + struct udp_pcb **pprev; + + LWIP_ASSERT1(fwudp->inbuf.unsent == fwudp->inbuf.vacant); + + pprev = &udp_proxy_pcbs; + pcb = udp_proxy_pcbs; + while (pcb != NULL) { + if (pcb->recv_arg != fwudp) { + pprev = &pcb->next; + pcb = pcb->next; + } + else { + struct udp_pcb *dead = pcb; + pcb = pcb->next; + *pprev = pcb; + memp_free(MEMP_UDP_PCB, dead); + } + } + + closesocket(fwudp->sock); + free(fwudp->inbuf.buf); + free(fwudp); +} diff --git a/src/VBox/NetworkServices/NAT/lwipopts.h b/src/VBox/NetworkServices/NAT/lwipopts.h new file mode 100644 index 00000000..2ac26457 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/lwipopts.h @@ -0,0 +1,179 @@ +#ifndef _VBOX_NETNAT_LWIP_OPTS_H_ +#define _VBOX_NETNAT_LWIP_OPTS_H_ + +#include <iprt/mem.h> +#include <iprt/alloca.h> /* This may include malloc.h (msc), which is something that has + * to be done before redefining any of the functions therein. */ +#include <iprt/rand.h> /* see LWIP_RAND() definition */ + +/* lwip/sockets.h assumes that if FD_SET is defined (in case of Innotek GCC + * its definition is dragged through iprt/types.h) then struct timeval is + * defined as well, but it's not the case. So include it manually. */ +#ifdef RT_OS_OS2 +# include <sys/time.h> +#endif + +/** Make lwIP use the libc malloc, or more precisely (see below) the IPRT + * memory allocation functions. */ +#define MEM_LIBC_MALLOC 1 + +/** Set proper memory alignment. */ +#if HC_ARCH_BITS == 64 +# define MEM_ALIGNMENT 8 +#else +#define MEM_ALIGNMENT 4 +#endif + +/* Padding before Ethernet header to make IP header aligned */ +#define ETH_PAD_SIZE 2 + +/* IP */ +#define IP_REASSEMBLY 1 +#define IP_REASS_MAX_PBUFS 128 + + + +/** Increase maximum TCP window size. */ +#define TCP_WND 32768 + +/** Increase TCP maximum segment size. */ +#define TCP_MSS 1460 + +/** Enable queueing of out-of-order segments. */ +#define TCP_QUEUE_OOSEQ 1 + +/** TCP sender buffer space (bytes). */ +#define TCP_SND_BUF (32 * TCP_MSS) + +/* TCP sender buffer space (pbufs). This must be at least = 2 * + TCP_SND_BUF/TCP_MSS for things to work. */ +#define TCP_SND_QUEUELEN 64 + +/* MEMP_NUM_PBUF: the number of memp struct pbufs. If the application + sends a lot of data out of ROM (or other static memory), this + should be set high. + + NB: This is for PBUF_ROM and PBUF_REF pbufs only! + + Number of PBUF_POOL pbufs is controlled by PBUF_POOL_SIZE that, + somewhat confusingly, breaks MEMP_NUM_* pattern. + + PBUF_RAM pbufs are allocated with mem_malloc (with MEM_LIBC_MALLOC + set to 1 this is just system malloc), not memp_malloc. */ +#define MEMP_NUM_PBUF (1024 * 4) + + +/* MEMP_NUM_MLD6_GROUP: Maximum number of IPv6 multicast groups that + can be joined. + + We need to be able to join solicited node multicast for each + address (potentially different) and two groups for DHCP6. All + routers multicast is hardcoded in ip6.c and does not require + explicit joining. Provide also for a few extra groups just in + case. */ +#define MEMP_NUM_MLD6_GROUP (LWIP_IPV6_NUM_ADDRESSES + /* dhcp6 */ 2 + /* extra */ 8) + + +/* MEMP_NUM_TCP_SEG: the number of simultaneously queued TCP + segments. */ +#define MEMP_NUM_TCP_SEG (MEMP_NUM_TCP_PCB * TCP_SND_QUEUELEN / 2) + +/* MEMP_NUM_TCP_PCB: the number of simulatenously active TCP + connections. */ +#define MEMP_NUM_TCP_PCB 128 + +/* MEMP_NUM_TCPIP_MSG_*: the number of struct tcpip_msg, which is used + for sequential API communication and incoming packets. Used in + src/api/tcpip.c. */ +#define MEMP_NUM_TCPIP_MSG_API 128 +#define MEMP_NUM_TCPIP_MSG_INPKT 1024 + +/* MEMP_NUM_UDP_PCB: the number of UDP protocol control blocks. One + per active UDP "connection". */ +#define MEMP_NUM_UDP_PCB 32 + +/* Pbuf options */ +/* PBUF_POOL_SIZE: the number of buffers in the pbuf pool. + This is only for PBUF_POOL pbufs, primarily used by netif drivers. + + This should have been named with the MEMP_NUM_ prefix (cf. + MEMP_NUM_PBUF for PBUF_ROM and PBUF_REF) as it controls the size of + yet another memp_malloc() pool. */ +#define PBUF_POOL_SIZE (1024 * 4) + +/* PBUF_POOL_BUFSIZE: the size of each pbuf in the pbuf pool. + Use default that is based on TCP_MSS and PBUF_LINK_HLEN. */ +#undef PBUF_POOL_BUFSIZE + +/** Turn on support for lightweight critical region protection. Leaving this + * off uses synchronization code in pbuf.c which is totally polluted with + * races. All the other lwip source files would fall back to semaphore-based + * synchronization, but pbuf.c is just broken, leading to incorrect allocation + * and as a result to assertions due to buffers being double freed. */ +#define SYS_LIGHTWEIGHT_PROT 1 + +/** Attempt to get rid of htons etc. macro issues. */ +#undef LWIP_PREFIX_BYTEORDER_FUNCS + +#define LWIP_TCPIP_CORE_LOCKING_INPUT 0 +#define LWIP_TCPIP_CORE_LOCKING 0 +#define LWIP_TCP 1 +#define LWIP_SOCKET 0 +#define LWIP_ARP 1 +#define ARP_PROXY 1 +#define LWIP_ETHERNET 1 +#define LWIP_COMPAT_SOCKETS 0 +#define LWIP_COMPAT_MUTEX 1 + +#define LWIP_IPV6 1 +#define LWIP_IPV6_FORWARD 1 +#define LWIP_ND6_PROXY 1 + +#define LWIP_ND6_ALLOW_RA_UPDATES (!LWIP_IPV6_FORWARD) +#define LWIP_IPV6_SEND_ROUTER_SOLICIT (!LWIP_IPV6_FORWARD) +/* IPv6 autoconfig we don't need in proxy, but it required for very seldom cases + * iSCSI over intnet with IPv6 + */ +#define LWIP_IPV6_AUTOCONFIG 1 +#if LWIP_IPV6_FORWARD /* otherwise use the default from lwip/opt.h */ +#define LWIP_IPV6_DUP_DETECT_ATTEMPTS 0 +#endif + +#define LWIP_IPV6_FRAG 1 + +/** + * aka Slirp mode. + */ +#define LWIP_CONNECTION_PROXY 1 +#define IP_FORWARD 1 + +/* MEMP_NUM_SYS_TIMEOUT: the number of simultaneously active + timeouts. */ +#define MEMP_NUM_SYS_TIMEOUT 16 + + +/* this is required for IPv6 and IGMP needs */ +#define LWIP_RAND() RTRandU32() + +/* Debugging stuff. */ +#ifdef DEBUG +# define LWIP_DEBUG +# include "lwip-log.h" + +# define LWIP_PROXY_DEBUG LWIP_DBG_OFF +#endif /* DEBUG */ + +/* printf formatter definitions */ +#define U16_F "hu" +#define S16_F "hd" +#define X16_F "hx" +#define U32_F "lu" +#define S32_F "ld" +#define X32_F "lx" + +/* Redirect libc memory alloc functions to IPRT. */ +#define malloc(x) RTMemAlloc(x) +#define realloc(x,y) RTMemRealloc((x), (y)) +#define free(x) RTMemFree(x) + +#endif /* _VBOX_NETNAT_LWIP_OPTS_H_ */ diff --git a/src/VBox/NetworkServices/NAT/portfwd.c b/src/VBox/NetworkServices/NAT/portfwd.c new file mode 100644 index 00000000..d4593834 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/portfwd.c @@ -0,0 +1,244 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "winutils.h" +#include "portfwd.h" + +#ifndef RT_OS_WINDOWS +#include <arpa/inet.h> +#include <netdb.h> +#include <poll.h> +#else +# include "winpoll.h" +#endif +#include <stdio.h> +#include <string.h> + +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxremap.h" + +#include "lwip/netif.h" + + +struct portfwd_msg { + struct fwspec *fwspec; + int add; +}; + + +static int portfwd_chan_send(struct portfwd_msg *); +static int portfwd_rule_add_del(struct fwspec *, int); +static int portfwd_pmgr_chan(struct pollmgr_handler *, SOCKET, int); + + +static struct pollmgr_handler portfwd_pmgr_chan_hdl; + + +void +portfwd_init(void) +{ + portfwd_pmgr_chan_hdl.callback = portfwd_pmgr_chan; + portfwd_pmgr_chan_hdl.data = NULL; + portfwd_pmgr_chan_hdl.slot = -1; + pollmgr_add_chan(POLLMGR_CHAN_PORTFWD, &portfwd_pmgr_chan_hdl); + + /* add preconfigured forwarders */ + fwtcp_init(); + fwudp_init(); +} + + +static int +portfwd_chan_send(struct portfwd_msg *msg) +{ + ssize_t nsent; + + nsent = pollmgr_chan_send(POLLMGR_CHAN_PORTFWD, &msg, sizeof(msg)); + if (nsent < 0) { + free(msg); + return -1; + } + + return 0; +} + + +static int +portfwd_rule_add_del(struct fwspec *fwspec, int add) +{ + struct portfwd_msg *msg; + + msg = (struct portfwd_msg *)malloc(sizeof(*msg)); + if (msg == NULL) { + return -1; + } + + msg->fwspec = fwspec; + msg->add = add; + + return portfwd_chan_send(msg); +} + + +int +portfwd_rule_add(struct fwspec *fwspec) +{ + return portfwd_rule_add_del(fwspec, 1); +} + + +int +portfwd_rule_del(struct fwspec *fwspec) +{ + return portfwd_rule_add_del(fwspec, 0); +} + + +/** + * POLLMGR_CHAN_PORTFWD handler. + */ +static int +portfwd_pmgr_chan(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + void *ptr = pollmgr_chan_recv_ptr(handler, fd, revents); + struct portfwd_msg *msg = (struct portfwd_msg *)ptr; + + if (msg->fwspec->stype == SOCK_STREAM) { + if (msg->add) { + fwtcp_add(msg->fwspec); + } + else { + fwtcp_del(msg->fwspec); + } + } + else { /* SOCK_DGRAM */ + if (msg->add) { + fwudp_add(msg->fwspec); + } + else { + fwudp_del(msg->fwspec); + } + } + + free(msg->fwspec); + free(msg); + + return POLLIN; +} + + +int +fwspec_set(struct fwspec *fwspec, int sdom, int stype, + const char *src_addr_str, uint16_t src_port, + const char *dst_addr_str, uint16_t dst_port) +{ + struct addrinfo hints; + struct addrinfo *ai; + int status; + + LWIP_ASSERT1(sdom == PF_INET || sdom == PF_INET6); + LWIP_ASSERT1(stype == SOCK_STREAM || stype == SOCK_DGRAM); + + fwspec->sdom = sdom; + fwspec->stype = stype; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = (sdom == PF_INET) ? AF_INET : AF_INET6; + hints.ai_socktype = stype; + hints.ai_flags = AI_NUMERICHOST; + + status = getaddrinfo(src_addr_str, NULL, &hints, &ai); + if (status != 0) { + LogRel(("\"%s\": %s\n", src_addr_str, gai_strerror(status))); + return -1; + } + LWIP_ASSERT1(ai != NULL); + LWIP_ASSERT1(ai->ai_addrlen <= sizeof(fwspec->src)); + memcpy(&fwspec->src, ai->ai_addr, ai->ai_addrlen); + freeaddrinfo(ai); + ai = NULL; + + status = getaddrinfo(dst_addr_str, NULL, &hints, &ai); + if (status != 0) { + LogRel(("\"%s\": %s\n", dst_addr_str, gai_strerror(status))); + return -1; + } + LWIP_ASSERT1(ai != NULL); + LWIP_ASSERT1(ai->ai_addrlen <= sizeof(fwspec->dst)); + memcpy(&fwspec->dst, ai->ai_addr, ai->ai_addrlen); + freeaddrinfo(ai); + ai = NULL; + + if (sdom == PF_INET) { + fwspec->src.sin.sin_port = htons(src_port); + fwspec->dst.sin.sin_port = htons(dst_port); + } + else { /* PF_INET6 */ + fwspec->src.sin6.sin6_port = htons(src_port); + fwspec->dst.sin6.sin6_port = htons(dst_port); + } + + return 0; +} + + +int +fwspec_equal(struct fwspec *a, struct fwspec *b) +{ + LWIP_ASSERT1(a != NULL); + LWIP_ASSERT1(b != NULL); + + if (a->sdom != b->sdom || a->stype != b->stype) { + return 0; + } + + if (a->sdom == PF_INET) { + return a->src.sin.sin_port == b->src.sin.sin_port + && a->dst.sin.sin_port == b->dst.sin.sin_port + && a->src.sin.sin_addr.s_addr == b->src.sin.sin_addr.s_addr + && a->dst.sin.sin_addr.s_addr == b->dst.sin.sin_addr.s_addr; + } + else { /* PF_INET6 */ + return a->src.sin6.sin6_port == b->src.sin6.sin6_port + && a->dst.sin6.sin6_port == b->dst.sin6.sin6_port + && IN6_ARE_ADDR_EQUAL(&a->src.sin6.sin6_addr, &b->src.sin6.sin6_addr) + && IN6_ARE_ADDR_EQUAL(&a->dst.sin6.sin6_addr, &b->dst.sin6.sin6_addr); + } +} + + +/** + * Set fwdsrc to the IP address of the peer. + * + * For port-forwarded connections originating from hosts loopback the + * source address is set to the address of one of lwIP interfaces. + * + * Currently we only have one interface so there's not much logic + * here. In the future we might need to additionally consult fwspec + * and routing table to determine which netif is used for connections + * to the specified guest. + */ +int +fwany_ipX_addr_set_src(ipX_addr_t *fwdsrc, const struct sockaddr *peer) +{ + int mapping; + + if (peer->sa_family == AF_INET) { + const struct sockaddr_in *peer4 = (const struct sockaddr_in *)peer; + ip_addr_t peerip4; + + peerip4.addr = peer4->sin_addr.s_addr; + mapping = pxremap_inbound_ip4(&fwdsrc->ip4, &peerip4); + } + else if (peer->sa_family == AF_INET6) { + const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)peer; + ip6_addr_t peerip6; + + memcpy(&peerip6, &peer6->sin6_addr, sizeof(ip6_addr_t)); + mapping = pxremap_inbound_ip6(&fwdsrc->ip6, &peerip6); + } + else { + mapping = PXREMAP_FAILED; + } + + return mapping; +} diff --git a/src/VBox/NetworkServices/NAT/portfwd.h b/src/VBox/NetworkServices/NAT/portfwd.h new file mode 100644 index 00000000..2eb2d6da --- /dev/null +++ b/src/VBox/NetworkServices/NAT/portfwd.h @@ -0,0 +1,55 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#ifndef _portfwd_h_ +#define _portfwd_h_ + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#endif + +#include "lwip/ip_addr.h" + + +struct fwspec { + int sdom; /* PF_INET, PF_INET6 */ + int stype; /* SOCK_STREAM, SOCK_DGRAM */ + + /* listen on */ + union { + struct sockaddr sa; + struct sockaddr_in sin; /* sdom == PF_INET */ + struct sockaddr_in6 sin6; /* sdom == PF_INET6 */ + } src; + + /* forward to */ + union { + struct sockaddr sa; + struct sockaddr_in sin; /* sdom == PF_INET */ + struct sockaddr_in6 sin6; /* sdom == PF_INET6 */ + } dst; +}; + + +void portfwd_init(void); +int portfwd_rule_add(struct fwspec *); +int portfwd_rule_del(struct fwspec *); + + +int fwspec_set(struct fwspec *, int, int, + const char *, uint16_t, + const char *, uint16_t); + +int fwspec_equal(struct fwspec *, struct fwspec *); + +void fwtcp_init(void); +void fwudp_init(void); + +void fwtcp_add(struct fwspec *); +void fwtcp_del(struct fwspec *); +void fwudp_add(struct fwspec *); +void fwudp_del(struct fwspec *); + +int fwany_ipX_addr_set_src(ipX_addr_t *, const struct sockaddr *); + +#endif /* _portfwd_h_ */ diff --git a/src/VBox/NetworkServices/NAT/proxy.c b/src/VBox/NetworkServices/NAT/proxy.c new file mode 100644 index 00000000..537ae851 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy.c @@ -0,0 +1,513 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "winutils.h" + +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "portfwd.h" + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" + +#ifndef RT_OS_WINDOWS +#include <sys/poll.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <fcntl.h> +#include <stdio.h> +#include <iprt/string.h> +#include <unistd.h> +#include <err.h> +#else +# include <iprt/string.h> +#endif + +#if defined(SOCK_NONBLOCK) && defined(RT_OS_NETBSD) /* XXX: PR kern/47569 */ +# undef SOCK_NONBLOCK +#endif + +#ifndef __arraycount +# define __arraycount(a) (sizeof(a)/sizeof(a[0])) +#endif + +static SOCKET proxy_create_socket(int, int); + +volatile struct proxy_options *g_proxy_options; +static sys_thread_t pollmgr_tid; + +/* XXX: for mapping loopbacks to addresses in our network (ip4) */ +struct netif *g_proxy_netif; +/* + * Called on the lwip thread (aka tcpip thread) from tcpip_init() via + * its "tcpip_init_done" callback. Raw API is ok to use here + * (e.g. rtadvd), but netconn API is not. + */ +void +proxy_init(struct netif *proxy_netif, struct proxy_options *opts) +{ + int status; + + LWIP_ASSERT1(opts != NULL); + LWIP_UNUSED_ARG(proxy_netif); + + g_proxy_options = opts; + g_proxy_netif = proxy_netif; + +#if 1 + proxy_rtadvd_start(proxy_netif); +#endif + + /* + * XXX: We use stateless DHCPv6 only to report IPv6 address(es) of + * nameserver(s). Since we don't yet support IPv6 addresses in + * HostDnsService, there's no point in running DHCPv6. + */ +#if 0 + dhcp6ds_init(proxy_netif); +#endif + + if (opts->tftp_root != NULL) { + tftpd_init(proxy_netif, opts->tftp_root); + } + + status = pollmgr_init(); + if (status < 0) { + errx(EXIT_FAILURE, "failed to initialize poll manager"); + /* NOTREACHED */ + } + + pxtcp_init(); + pxudp_init(); + + portfwd_init(); + + pxdns_init(proxy_netif); + + pxping_init(proxy_netif, opts->icmpsock4, opts->icmpsock6); + + pollmgr_tid = sys_thread_new("pollmgr_thread", + pollmgr_thread, NULL, + DEFAULT_THREAD_STACKSIZE, + DEFAULT_THREAD_PRIO); + if (!pollmgr_tid) { + errx(EXIT_FAILURE, "failed to create poll manager thread"); + /* NOTREACHED */ + } +} + + +/** + * Send static callback message from poll manager thread to lwip + * thread, scheduling a function call in lwip thread context. + * + * XXX: Existing lwip api only provides non-blocking version for this. + * It may fail when lwip thread is not running (mbox invalid) or if + * post failed (mbox full). How to handle these? + */ +void +proxy_lwip_post(struct tcpip_msg *msg) +{ + struct tcpip_callback_msg *m; + err_t error; + + LWIP_ASSERT1(msg != NULL); + + /* + * lwip plays games with fake incomplete struct tag to enforce API + */ + m = (struct tcpip_callback_msg *)msg; + error = tcpip_callbackmsg(m); + + if (error == ERR_VAL) { + /* XXX: lwip thread is not running (mbox invalid) */ + LWIP_ASSERT1(error != ERR_VAL); + } + + LWIP_ASSERT1(error == ERR_OK); +} + + +/** + * Create a non-blocking socket. Disable SIGPIPE for TCP sockets if + * possible. On Linux it's not possible and should be disabled for + * each send(2) individually. + */ +static SOCKET +proxy_create_socket(int sdom, int stype) +{ + SOCKET s; + int stype_and_flags; + int status; + + LWIP_UNUSED_ARG(status); /* depends on ifdefs */ + + + stype_and_flags = stype; + +#if defined(SOCK_NONBLOCK) + stype_and_flags |= SOCK_NONBLOCK; +#endif + + /* + * Disable SIGPIPE on disconnected socket. It might be easier to + * forgo it and just use MSG_NOSIGNAL on each send*(2), since we + * have to do it for Linux anyway, but Darwin does NOT have that + * flag (but has SO_NOSIGPIPE socket option). + */ +#if !defined(SOCK_NOSIGPIPE) && !defined(SO_NOSIGPIPE) && !defined(MSG_NOSIGNAL) +#if 0 /* XXX: Solaris has neither, the program should ignore SIGPIPE globally */ +#error Need a way to disable SIGPIPE on connection oriented sockets! +#endif +#endif + +#if defined(SOCK_NOSIGPIPE) + if (stype == SOCK_STREAM) { + stype_and_flags |= SOCK_NOSIGPIPE; + } +#endif + + s = socket(sdom, stype_and_flags, 0); + if (s == INVALID_SOCKET) { + perror("socket"); + return INVALID_SOCKET; + } + +#if !defined(SOCK_NONBLOCK) && !defined(RT_OS_WINDOWS) + { + int sflags; + + status = fcntl(s, F_GETFL, &sflags); + if (status < 0) { + perror("F_GETFL"); + closesocket(s); + return INVALID_SOCKET; + } + + status = fcntl(s, F_SETFL, sflags | O_NONBLOCK); + if (status < 0) { + perror("O_NONBLOCK"); + closesocket(s); + return INVALID_SOCKET; + } + } +#endif + +#if !defined(SOCK_NOSIGPIPE) && defined(SO_NOSIGPIPE) + if (stype == SOCK_STREAM) { + int on = 1; + const socklen_t onlen = sizeof(on); + + status = setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &on, onlen); + if (status < 0) { + perror("SO_NOSIGPIPE"); + closesocket(s); + return INVALID_SOCKET; + } + } +#endif + +#if defined(RT_OS_WINDOWS) + { + u_long mode = 0; + status = ioctlsocket(s, FIONBIO, &mode); + if (status == SOCKET_ERROR) { + warn("ioctl error: %d\n", WSAGetLastError()); + return INVALID_SOCKET; + } + } +#endif + + return s; +} + + +/** + * Create a socket for outbound connection to dst_addr:dst_port. + * + * The socket is non-blocking and TCP sockets has SIGPIPE disabled if + * possible. On Linux it's not possible and should be disabled for + * each send(2) individually. + */ +SOCKET +proxy_connected_socket(int sdom, int stype, + ipX_addr_t *dst_addr, u16_t dst_port) +{ + struct sockaddr_in6 dst_sin6; + struct sockaddr_in dst_sin; + struct sockaddr *pdst_sa; + socklen_t dst_sa_len; + void *pdst_addr; + const struct sockaddr *psrc_sa; + socklen_t src_sa_len; + int status; + SOCKET s; + + LWIP_ASSERT1(sdom == PF_INET || sdom == PF_INET6); + LWIP_ASSERT1(stype == SOCK_STREAM || stype == SOCK_DGRAM); + + if (sdom == PF_INET6) { + pdst_sa = (struct sockaddr *)&dst_sin6; + pdst_addr = (void *)&dst_sin6.sin6_addr; + + memset(&dst_sin6, 0, sizeof(dst_sin6)); +#if HAVE_SA_LEN + dst_sin6.sin6_len = +#endif + dst_sa_len = sizeof(dst_sin6); + dst_sin6.sin6_family = AF_INET6; + memcpy(&dst_sin6.sin6_addr, &dst_addr->ip6, sizeof(ip6_addr_t)); + dst_sin6.sin6_port = htons(dst_port); + } + else { /* sdom = PF_INET */ + pdst_sa = (struct sockaddr *)&dst_sin; + pdst_addr = (void *)&dst_sin.sin_addr; + + memset(&dst_sin, 0, sizeof(dst_sin)); +#if HAVE_SA_LEN + dst_sin.sin_len = +#endif + dst_sa_len = sizeof(dst_sin); + dst_sin.sin_family = AF_INET; + dst_sin.sin_addr.s_addr = dst_addr->ip4.addr; /* byte-order? */ + dst_sin.sin_port = htons(dst_port); + } + +#if LWIP_PROXY_DEBUG && !RT_OS_WINDOWS + { + char addrbuf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; + const char *addrstr; + + addrstr = inet_ntop(sdom, pdst_addr, addrbuf, sizeof(addrbuf)); + DPRINTF(("---> %s %s%s%s:%d ", + stype == SOCK_STREAM ? "TCP" : "UDP", + sdom == PF_INET6 ? "[" : "", + addrstr, + sdom == PF_INET6 ? "]" : "", + dst_port)); + } +#endif + + s = proxy_create_socket(sdom, stype); + if (s == INVALID_SOCKET) { + return INVALID_SOCKET; + } + DPRINTF(("socket %d\n", s)); + + /* TODO: needs locking if dynamic modifyvm is allowed */ + if (sdom == PF_INET6) { + psrc_sa = (const struct sockaddr *)g_proxy_options->src6; + src_sa_len = sizeof(struct sockaddr_in6); + } + else { + psrc_sa = (const struct sockaddr *)g_proxy_options->src4; + src_sa_len = sizeof(struct sockaddr_in); + } + if (psrc_sa != NULL) { + status = bind(s, psrc_sa, src_sa_len); + if (status == SOCKET_ERROR) { + DPRINTF(("socket %d: bind: %s\n", s, strerror(errno))); + closesocket(s); + return INVALID_SOCKET; + } + } + + status = connect(s, pdst_sa, dst_sa_len); + if (status == SOCKET_ERROR && errno != EINPROGRESS) { + DPRINTF(("socket %d: connect: %s\n", s, strerror(errno))); + closesocket(s); + return INVALID_SOCKET; + } + + return s; +} + + +/** + * Create a socket for inbound (port-forwarded) connections to + * src_addr (port is part of sockaddr, so not a separate argument). + * + * The socket is non-blocking and TCP sockets has SIGPIPE disabled if + * possible. On Linux it's not possible and should be disabled for + * each send(2) individually. + * + * TODO?: Support v6-mapped v4 so that user can specify she wants + * "udp" and get both versions? + */ +SOCKET +proxy_bound_socket(int sdom, int stype, struct sockaddr *src_addr) +{ + SOCKET s; + int on; + const socklen_t onlen = sizeof(on); + int status; + + s = proxy_create_socket(sdom, stype); + if (s == INVALID_SOCKET) { + return INVALID_SOCKET; + } + DPRINTF(("socket %d\n", s)); + + on = 1; + status = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&on, onlen); + if (status < 0) { /* not good, but not fatal */ + warn("SO_REUSEADDR"); + } + + status = bind(s, src_addr, + sdom == PF_INET ? + sizeof(struct sockaddr_in) + : sizeof(struct sockaddr_in6)); + if (status < 0) { + perror("bind"); + closesocket(s); + return INVALID_SOCKET; + } + + if (stype == SOCK_STREAM) { + status = listen(s, 5); + if (status < 0) { + perror("listen"); + closesocket(s); + return INVALID_SOCKET; + } + } + + return s; +} + + +void +proxy_reset_socket(SOCKET s) +{ + struct linger linger; + + linger.l_onoff = 1; + linger.l_linger = 0; + + /* On Windows we can run into issue here, perhaps SO_LINGER isn't enough, and + * we should use WSA{Send,Recv}Disconnect instead. + * + * Links for the reference: + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms738547%28v=vs.85%29.aspx + * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4468997 + */ + setsockopt(s, SOL_SOCKET, SO_LINGER, (char *)&linger, sizeof(linger)); + + closesocket(s); +} + + +int +proxy_sendto(SOCKET sock, struct pbuf *p, void *name, size_t namelen) +{ + struct pbuf *q; + size_t i, clen; +#ifndef RT_OS_WINDOWS + struct msghdr mh; + ssize_t nsent; +#else + DWORD nsent; + int rc; +#endif + IOVEC fixiov[8]; /* fixed size (typical case) */ + const size_t fixiovsize = sizeof(fixiov)/sizeof(fixiov[0]); + IOVEC *dyniov; /* dynamically sized */ + IOVEC *iov; + int error = 0; + + /* + * Static iov[] is usually enough since UDP protocols use small + * datagrams to avoid fragmentation, but be prepared. + */ + clen = pbuf_clen(p); + if (clen > fixiovsize) { + /* + * XXX: TODO: check that clen is shorter than IOV_MAX + */ + dyniov = (IOVEC *)malloc(clen * sizeof(*dyniov)); + if (dyniov == NULL) { + error = -errno; + goto out; + } + iov = dyniov; + } + else { + dyniov = NULL; + iov = fixiov; + } + + + for (q = p, i = 0; i < clen; q = q->next, ++i) { + LWIP_ASSERT1(q != NULL); + + IOVEC_SET_BASE(iov[i], q->payload); + IOVEC_SET_LEN(iov[i], q->len); + } + +#ifndef RT_OS_WINDOWS + memset(&mh, 0, sizeof(mh)); + mh.msg_name = name; + mh.msg_namelen = namelen; + mh.msg_iov = iov; + mh.msg_iovlen = clen; + + nsent = sendmsg(sock, &mh, 0); + if (nsent < 0) { + error = -errno; + DPRINTF(("%s: fd %d: sendmsg errno %d\n", + __func__, sock, errno)); + } +#else + rc = WSASendTo(sock, iov, (DWORD)clen, &nsent, 0, + name, (int)namelen, NULL, NULL); + if (rc == SOCKET_ERROR) { + DPRINTF(("%s: fd %d: sendmsg errno %d\n", + __func__, sock, WSAGetLastError())); + error = -WSAGetLastError(); + } +#endif + + out: + if (dyniov != NULL) { + free(dyniov); + } + return error; +} + + +static const char *lwiperr[] = { + "ERR_OK", + "ERR_MEM", + "ERR_BUF", + "ERR_TIMEOUT", + "ERR_RTE", + "ERR_INPROGRESS", + "ERR_VAL", + "ERR_WOULDBLOCK", + "ERR_USE", + "ERR_ISCONN", + "ERR_ABRT", + "ERR_RST", + "ERR_CLSD", + "ERR_CONN", + "ERR_ARG", + "ERR_IF" +}; + + +const char * +proxy_lwip_strerr(err_t error) +{ + static char buf[32]; + int e = -error; + + if (0 < e || e < (int)__arraycount(lwiperr)) { + return lwiperr[e]; + } + else { + RTStrPrintf(buf, sizeof(buf), "unknown error %d", error); + return buf; + } +} diff --git a/src/VBox/NetworkServices/NAT/proxy.h b/src/VBox/NetworkServices/NAT/proxy.h new file mode 100644 index 00000000..20df34b6 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy.h @@ -0,0 +1,101 @@ +#ifndef _nat_proxy_h_ +#define _nat_proxy_h_ + +#if !defined(VBOX) +#include "vbox-compat.h" +#endif + +#include "lwip/err.h" +#include "lwip/ip_addr.h" +#include "winutils.h" + +/* forward */ +struct netif; +struct tcpip_msg; +struct pbuf; +struct sockaddr; +struct sockaddr_in; +struct sockaddr_in6; + +struct ip4_lomap +{ + ip_addr_t loaddr; + uint32_t off; +}; + +struct ip4_lomap_desc +{ + const struct ip4_lomap *lomap; + unsigned int num_lomap; +}; + +struct proxy_options { + int ipv6_enabled; + int ipv6_defroute; + SOCKET icmpsock4; + SOCKET icmpsock6; + const char *tftp_root; + const struct sockaddr_in *src4; + const struct sockaddr_in6 *src6; + const struct ip4_lomap_desc *lomap_desc; + const char **nameservers; +}; + +extern volatile struct proxy_options *g_proxy_options; +extern struct netif *g_proxy_netif; + +void proxy_init(struct netif *, struct proxy_options *); +SOCKET proxy_connected_socket(int, int, ipX_addr_t *, u16_t); +SOCKET proxy_bound_socket(int, int, struct sockaddr *); +void proxy_reset_socket(SOCKET); +int proxy_sendto(SOCKET, struct pbuf *, void *, size_t); +void proxy_lwip_post(struct tcpip_msg *); +const char *proxy_lwip_strerr(err_t); + +/* proxy_rtadvd.c */ +void proxy_rtadvd_start(struct netif *); +void proxy_rtadvd_do_quick(void *); + +/* rtmon_*.c */ +int rtmon_get_defaults(void); + +/* proxy_dhcp6ds.c */ +err_t dhcp6ds_init(struct netif *); + +/* proxy_tftpd.c */ +err_t tftpd_init(struct netif *, const char *); + +/* pxtcp.c */ +void pxtcp_init(void); + +/* pxudp.c */ +void pxudp_init(void); + +/* pxdns.c */ +err_t pxdns_init(struct netif *); +void pxdns_set_nameservers(void *); + +/* pxping.c */ +err_t pxping_init(struct netif *, SOCKET, SOCKET); + + +#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) +# define HAVE_SA_LEN 0 +#else +# define HAVE_SA_LEN 1 +#endif + +#define LWIP_ASSERT1(condition) LWIP_ASSERT(#condition, condition) +/* TODO: review debug levels and types */ +#if !LWIP_PROXY_DEBUG +# define DPRINTF_LEVEL(y, x) do {} while (0) +#else +# define DPRINTF_LEVEL(level, x) do { LWIP_DEBUGF(LWIP_PROXY_DEBUG | (level), x); } while (0) +#endif + +#define DPRINTF(x) DPRINTF_LEVEL(0, x) +#define DPRINTF0(x) DPRINTF_LEVEL(LWIP_DBG_LEVEL_WARNING, x) +#define DPRINTF1(x) DPRINTF_LEVEL(LWIP_DBG_LEVEL_SERIOUS, x) +#define DPRINTF2(x) DPRINTF_LEVEL(LWIP_DBG_LEVEL_SEVERE, x) + +#endif /* _nat_proxy_h_ */ diff --git a/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c b/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c new file mode 100644 index 00000000..2eebeef5 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_dhcp6ds.c @@ -0,0 +1,301 @@ +/* -*- indent-tabs-mode: nil; -*- */ +/** + * Simple stateless DHCPv6 (RFC 3736) server. + */ +#include "winutils.h" +#include "dhcp6.h" +#include "proxy.h" + +#include <string.h> + +#include "lwip/opt.h" +#include "lwip/mld6.h" +#include "lwip/udp.h" + + +static void dhcp6ds_recv(void *, struct udp_pcb *, struct pbuf *, ip6_addr_t *, u16_t); + + +/* ff02::1:2 - "All_DHCP_Relay_Agents_and_Servers" link-scoped multicast */ +static /* const */ ip6_addr_t all_dhcp_relays_and_servers = { + { PP_HTONL(0xff020000UL), 0, 0, PP_HTONL(0x00010002UL) } +}; + +/* ff05::1:3 - "All_DHCP_Servers" site-scoped multicast */ +static /* const */ ip6_addr_t all_dhcp_servers = { + { PP_HTONL(0xff050000UL), 0, 0, PP_HTONL(0x00010003UL) } +}; + + +static struct udp_pcb *dhcp6ds_pcb; + +/* prebuilt Server ID option */ +#define DUID_LL_LEN (/* duid type */ 2 + /* hw type */ 2 + /* ether addr */ 6) +static u8_t dhcp6ds_serverid[/* opt */ 2 + /* optlen */ 2 + DUID_LL_LEN]; + +/* prebuilt DNS Servers option */ +static u8_t dhcp6ds_dns[/* opt */ 2 + /* optlen */ 2 + /* IPv6 addr */ 16]; + + +/** + * Initialize DHCP6 server. + * + * Join DHCP6 multicast groups. + * Create and bind server pcb. + * Prebuild fixed parts of reply. + */ +err_t +dhcp6ds_init(struct netif *proxy_netif) +{ + ip6_addr_t *pxaddr, *pxaddr_nonlocal; + int i; + err_t error; + + LWIP_ASSERT1(proxy_netif != NULL); + LWIP_ASSERT1(proxy_netif->hwaddr_len == 6); /* ethernet */ + + pxaddr = netif_ip6_addr(proxy_netif, 0); /* link local */ + + /* + * XXX: TODO: This is a leftover from testing with IPv6 mapped + * loopback with a special IPv6->IPv4 mapping hack in pxudp.c + */ + /* advertise ourself as DNS resolver - will be proxied to host */ + pxaddr_nonlocal = NULL; + for (i = 1; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + if (ip6_addr_ispreferred(netif_ip6_addr_state(proxy_netif, i)) + && !ip6_addr_islinklocal(netif_ip6_addr(proxy_netif, i))) + { + pxaddr_nonlocal = netif_ip6_addr(proxy_netif, i); + break; + } + } + LWIP_ASSERT1(pxaddr_nonlocal != NULL); /* must be configured on the netif */ + + + error = mld6_joingroup(pxaddr, &all_dhcp_relays_and_servers); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to join All_DHCP_Relay_Agents_and_Servers: %s\n", + __func__, proxy_lwip_strerr(error))); + goto err; + } + + error = mld6_joingroup(pxaddr, &all_dhcp_servers); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to join All_DHCP_Servers: %s\n", + __func__, proxy_lwip_strerr(error))); + goto err1; + } + + + dhcp6ds_pcb = udp_new_ip6(); + if (dhcp6ds_pcb == NULL) { + DPRINTF0(("%s: failed to allocate PCB\n", __func__)); + error = ERR_MEM; + goto err2; + } + + udp_recv_ip6(dhcp6ds_pcb, dhcp6ds_recv, NULL); + + error = udp_bind_ip6(dhcp6ds_pcb, pxaddr, DHCP6_SERVER_PORT); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to bind PCB\n", __func__)); + goto err3; + } + + +#define OPT_SET(buf, off, c) do { \ + u16_t _s = PP_HTONS(c); \ + memcpy(&(buf)[off], &_s, sizeof(u16_t)); \ + } while (0) + +#define SERVERID_SET(off, c) OPT_SET(dhcp6ds_serverid, (off), (c)) +#define DNSSRV_SET(off, c) OPT_SET(dhcp6ds_dns, (off), (c)) + + SERVERID_SET(0, DHCP6_OPTION_SERVERID); + SERVERID_SET(2, DUID_LL_LEN); + SERVERID_SET(4, DHCP6_DUID_LL); + SERVERID_SET(6, ARES_HRD_ETHERNET); + memcpy(&dhcp6ds_serverid[8], proxy_netif->hwaddr, 6); + + DNSSRV_SET(0, DHCP6_OPTION_DNS_SERVERS); + DNSSRV_SET(2, 16); /* one IPv6 address */ + /* + * XXX: TODO: This is a leftover from testing with IPv6 mapped + * loopback with a special IPv6->IPv4 mapping hack in pxudp.c + */ + memcpy(&dhcp6ds_dns[4], pxaddr_nonlocal, sizeof(ip6_addr_t)); + +#undef SERVERID_SET +#undef DNSSRV_SET + + return ERR_OK; + + + err3: + udp_remove(dhcp6ds_pcb); + dhcp6ds_pcb = NULL; + err2: + mld6_leavegroup(pxaddr, &all_dhcp_servers); + err1: + mld6_leavegroup(pxaddr, &all_dhcp_relays_and_servers); + err: + return error; +} + + +static u8_t dhcp6ds_reply_buf[1024]; + +static void +dhcp6ds_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip6_addr_t *addr, u16_t port) +{ + u8_t msg_header[4]; + unsigned int msg_type, msg_tid; + int copied; + size_t roff; + struct pbuf *q; + err_t error; + + LWIP_UNUSED_ARG(arg); + LWIP_ASSERT1(p != NULL); + + copied = pbuf_copy_partial(p, msg_header, sizeof(msg_header), 0); + if (copied != sizeof(msg_header)) { + DPRINTF(("%s: message header truncated\n", __func__)); + pbuf_free(p); + return; + } + pbuf_header(p, -(s16_t)sizeof(msg_header)); + + msg_type = msg_header[0]; + msg_tid = (msg_header[1] << 16) | (msg_header[2] << 8) | msg_header[3]; + DPRINTF(("%s: type %u, tid 0x%6x\n", __func__, msg_type, msg_tid)); + if (msg_type != DHCP6_INFORMATION_REQUEST) { /* TODO:? RELAY_FORW */ + pbuf_free(p); + return; + } + + roff = 0; + + msg_header[0] = DHCP6_REPLY; + memcpy(dhcp6ds_reply_buf + roff, msg_header, sizeof(msg_header)); + roff += sizeof(msg_header); + + + /* loop over options */ + while (p->tot_len > 0) { + u16_t opt, optlen; + + /* fetch option code */ + copied = pbuf_copy_partial(p, &opt, sizeof(opt), 0); + if (copied != sizeof(opt)) { + DPRINTF(("%s: option header truncated\n", __func__)); + pbuf_free(p); + return; + } + pbuf_header(p, -(s16_t)sizeof(opt)); + opt = ntohs(opt); + + /* fetch option length */ + copied = pbuf_copy_partial(p, &optlen, sizeof(optlen), 0); + if (copied != sizeof(optlen)) { + DPRINTF(("%s: option %u length truncated\n", __func__, opt)); + pbuf_free(p); + return; + } + pbuf_header(p, -(s16_t)sizeof(optlen)); + optlen = ntohs(optlen); + + /* enough data? */ + if (optlen > p->tot_len) { + DPRINTF(("%s: option %u truncated: expect %u, got %u\n", + __func__, opt, optlen, p->tot_len)); + pbuf_free(p); + return; + } + + DPRINTF2(("%s: option %u length %u\n", __func__, opt, optlen)); + + if (opt == DHCP6_OPTION_CLIENTID) { + u16_t s; + + /* "A DUID can be no more than 128 octets long (not + including the type code)." */ + if (optlen > 130) { + DPRINTF(("%s: client DUID too long: %u\n", __func__, optlen)); + pbuf_free(p); + return; + } + + s = PP_HTONS(DHCP6_OPTION_CLIENTID); + memcpy(dhcp6ds_reply_buf + roff, &s, sizeof(s)); + roff += sizeof(s); + + s = ntohs(optlen); + memcpy(dhcp6ds_reply_buf + roff, &s, sizeof(s)); + roff += sizeof(s); + + pbuf_copy_partial(p, dhcp6ds_reply_buf + roff, optlen, 0); + roff += optlen; + } + else if (opt == DHCP6_OPTION_ORO) { + u16_t *opts; + int i, nopts; + + if (optlen % 2 != 0) { + DPRINTF2(("%s: Option Request of odd length\n", __func__)); + goto bad_oro; + } + nopts = optlen / 2; + + opts = (u16_t *)malloc(optlen); + if (opts == NULL) { + DPRINTF2(("%s: failed to allocate space for Option Request\n", + __func__)); + goto bad_oro; + } + + pbuf_copy_partial(p, opts, optlen, 0); + for (i = 0; i < nopts; ++i) { + opt = ntohs(opts[i]); + DPRINTF2(("> request option %u\n", opt)); + }; + free(opts); + + bad_oro: /* empty */; + } + + pbuf_header(p, -optlen); /* go to next option */ + } + pbuf_free(p); /* done */ + + + memcpy(dhcp6ds_reply_buf + roff, dhcp6ds_serverid, sizeof(dhcp6ds_serverid)); + roff += sizeof(dhcp6ds_serverid); + + memcpy(dhcp6ds_reply_buf + roff, dhcp6ds_dns, sizeof(dhcp6ds_dns)); + roff += sizeof(dhcp6ds_dns); + + q = pbuf_alloc(PBUF_RAW, roff, PBUF_RAM); + if (q == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)roff)); + return; + } + + error = pbuf_take(q, dhcp6ds_reply_buf, roff); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed: %s\n", + __func__, (int)roff, proxy_lwip_strerr(error))); + pbuf_free(q); + return; + } + + error = udp_sendto_ip6(pcb, q, addr, port); + if (error != ERR_OK) { + DPRINTF(("%s: udp_sendto failed: %s\n", + __func__, proxy_lwip_strerr(error))); + } + + pbuf_free(q); +} diff --git a/src/VBox/NetworkServices/NAT/proxy_pollmgr.c b/src/VBox/NetworkServices/NAT/proxy_pollmgr.c new file mode 100644 index 00000000..054eceb6 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_pollmgr.c @@ -0,0 +1,658 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "winutils.h" + +#include "proxy_pollmgr.h" +#include "proxy.h" + +#ifndef RT_OS_WINDOWS +#include <sys/socket.h> +#include <netinet/in.h> +#include <err.h> +#include <errno.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#else +#include <iprt/err.h> +#include <stdlib.h> +#include <string.h> +#include "winpoll.h" +#endif + +#define POLLMGR_GARBAGE (-1) + +struct pollmgr { + struct pollfd *fds; + struct pollmgr_handler **handlers; + nfds_t capacity; /* allocated size of the arrays */ + nfds_t nfds; /* part of the arrays in use */ + + /* channels (socketpair) for static slots */ + SOCKET chan[POLLMGR_SLOT_STATIC_COUNT][2]; +#define POLLMGR_CHFD_RD 0 /* - pollmgr side */ +#define POLLMGR_CHFD_WR 1 /* - client side */ +} pollmgr; + + +static void pollmgr_loop(void); + +static void pollmgr_add_at(int, struct pollmgr_handler *, SOCKET, int); +static void pollmgr_refptr_delete(struct pollmgr_refptr *); + + +/* + * We cannot portably peek at the length of the incoming datagram and + * pre-allocate pbuf chain to recvmsg() directly to it. On Linux it's + * possible to recv with MSG_PEEK|MSG_TRUC, but extra syscall is + * probably more expensive (haven't measured) than doing an extra copy + * of data, since typical UDP datagrams are small enough to avoid + * fragmentation. + * + * We can use shared buffer here since we read from sockets + * sequentially in a loop over pollfd. + */ +u8_t pollmgr_udpbuf[64 * 1024]; + + +int +pollmgr_init(void) +{ + struct pollfd *newfds; + struct pollmgr_handler **newhdls; + nfds_t newcap; + int status; + nfds_t i; + + pollmgr.fds = NULL; + pollmgr.handlers = NULL; + pollmgr.capacity = 0; + pollmgr.nfds = 0; + + for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { + pollmgr.chan[i][POLLMGR_CHFD_RD] = -1; + pollmgr.chan[i][POLLMGR_CHFD_WR] = -1; + } + + for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { +#ifndef RT_OS_WINDOWS + status = socketpair(PF_LOCAL, SOCK_DGRAM, 0, pollmgr.chan[i]); + if (status < 0) { + perror("socketpair"); + goto cleanup_close; + } +#else + status = RTWinSocketPair(PF_INET, SOCK_DGRAM, 0, pollmgr.chan[i]); + AssertRCReturn(status, -1); + + if (RT_FAILURE(status)) { + perror("socketpair"); + goto cleanup_close; + } +#endif + } + + + newcap = 16; /* XXX: magic */ + LWIP_ASSERT1(newcap >= POLLMGR_SLOT_STATIC_COUNT); + + newfds = (struct pollfd *) + malloc(newcap * sizeof(*pollmgr.fds)); + if (newfds == NULL) { + perror("calloc"); + goto cleanup_close; + } + + newhdls = (struct pollmgr_handler **) + malloc(newcap * sizeof(*pollmgr.handlers)); + if (newhdls == NULL) { + perror("malloc"); + free(newfds); + goto cleanup_close; + } + + pollmgr.capacity = newcap; + pollmgr.fds = newfds; + pollmgr.handlers = newhdls; + + pollmgr.nfds = POLLMGR_SLOT_STATIC_COUNT; + + for (i = 0; i < pollmgr.capacity; ++i) { + pollmgr.fds[i].fd = INVALID_SOCKET; + pollmgr.fds[i].events = 0; + pollmgr.fds[i].revents = 0; + } + + return 0; + + cleanup_close: + for (i = 0; i < POLLMGR_SLOT_STATIC_COUNT; ++i) { + SOCKET *chan = pollmgr.chan[i]; + if (chan[POLLMGR_CHFD_RD] >= 0) { + closesocket(chan[POLLMGR_CHFD_RD]); + closesocket(chan[POLLMGR_CHFD_WR]); + } + } + + return -1; +} + + +/* + * Must be called before pollmgr loop is started, so no locking. + */ +SOCKET +pollmgr_add_chan(int slot, struct pollmgr_handler *handler) +{ + if (slot >= POLLMGR_SLOT_FIRST_DYNAMIC) { + handler->slot = -1; + return -1; + } + + pollmgr_add_at(slot, handler, pollmgr.chan[slot][POLLMGR_CHFD_RD], POLLIN); + return pollmgr.chan[slot][POLLMGR_CHFD_WR]; +} + + +/* + * Must be called from pollmgr loop (via callbacks), so no locking. + */ +int +pollmgr_add(struct pollmgr_handler *handler, SOCKET fd, int events) +{ + int slot; + + DPRINTF2(("%s: new fd %d\n", __func__, fd)); + + if (pollmgr.nfds == pollmgr.capacity) { + struct pollfd *newfds; + struct pollmgr_handler **newhdls; + nfds_t newcap; + nfds_t i; + + newcap = pollmgr.capacity * 2; + + newfds = (struct pollfd *) + realloc(pollmgr.fds, newcap * sizeof(*pollmgr.fds)); + if (newfds == NULL) { + perror("realloc"); + handler->slot = -1; + return -1; + } + + pollmgr.fds = newfds; /* don't crash/leak if realloc(handlers) fails */ + /* but don't update capacity yet! */ + + newhdls = (struct pollmgr_handler **) + realloc(pollmgr.handlers, newcap * sizeof(*pollmgr.handlers)); + if (newhdls == NULL) { + perror("realloc"); + /* if we failed to realloc here, then fds points to the + * new array, but we pretend we still has old capacity */ + handler->slot = -1; + return -1; + } + + pollmgr.handlers = newhdls; + pollmgr.capacity = newcap; + + for (i = pollmgr.nfds; i < newcap; ++i) { + newfds[i].fd = INVALID_SOCKET; + newfds[i].events = 0; + newfds[i].revents = 0; + newhdls[i] = NULL; + } + } + + slot = pollmgr.nfds; + ++pollmgr.nfds; + + pollmgr_add_at(slot, handler, fd, events); + return slot; +} + + +static void +pollmgr_add_at(int slot, struct pollmgr_handler *handler, SOCKET fd, int events) +{ + pollmgr.fds[slot].fd = fd; + pollmgr.fds[slot].events = events; + pollmgr.fds[slot].revents = 0; + pollmgr.handlers[slot] = handler; + + handler->slot = slot; +} + + +ssize_t +pollmgr_chan_send(int slot, void *buf, size_t nbytes) +{ + SOCKET fd; + ssize_t nsent; + + if (slot >= POLLMGR_SLOT_FIRST_DYNAMIC) { + return -1; + } + + fd = pollmgr.chan[slot][POLLMGR_CHFD_WR]; + nsent = send(fd, buf, (int)nbytes, 0); + if (nsent == SOCKET_ERROR) { + warn("send on chan %d", slot); + return -1; + } + else if ((size_t)nsent != nbytes) { + warnx("send on chan %d: datagram truncated to %u bytes", + slot, (unsigned int)nsent); + return -1; + } + + return nsent; +} + + +/** + * Receive a pointer sent over poll manager channel. + */ +void * +pollmgr_chan_recv_ptr(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + void *ptr; + ssize_t nread; + + if (revents & POLLNVAL) { + errx(EXIT_FAILURE, "chan %d: fd invalid", (int)handler->slot); + /* NOTREACHED */ + } + + if (revents & (POLLERR | POLLHUP)) { + errx(EXIT_FAILURE, "chan %d: fd error", (int)handler->slot); + /* NOTREACHED */ + } + + LWIP_ASSERT1(revents & POLLIN); + nread = recv(fd, (char *)&ptr, sizeof(ptr), 0); + + if (nread == SOCKET_ERROR) { + err(EXIT_FAILURE, "chan %d: recv", (int)handler->slot); + /* NOTREACHED */ + } + if (nread != sizeof(ptr)) { + errx(EXIT_FAILURE, "chan %d: recv: read %d bytes", + (int)handler->slot, (int)nread); + /* NOTREACHED */ + } + + return ptr; +} + + +void +pollmgr_update_events(int slot, int events) +{ + LWIP_ASSERT1(slot >= POLLMGR_SLOT_FIRST_DYNAMIC); + LWIP_ASSERT1((nfds_t)slot < pollmgr.nfds); + + pollmgr.fds[slot].events = events; +} + + +void +pollmgr_del_slot(int slot) +{ + LWIP_ASSERT1(slot >= POLLMGR_SLOT_FIRST_DYNAMIC); + + DPRINTF2(("%s(%d): fd %d ! DELETED\n", + __func__, slot, pollmgr.fds[slot].fd)); + + pollmgr.fds[slot].fd = INVALID_SOCKET; /* see poll loop */ +} + + +void +pollmgr_thread(void *ignored) +{ + LWIP_UNUSED_ARG(ignored); + pollmgr_loop(); +} + + +static void +pollmgr_loop(void) +{ + int nready; + SOCKET delfirst; + SOCKET *pdelprev; + int i; + + for (;;) { +#ifndef RT_OS_WINDOWS + nready = poll(pollmgr.fds, pollmgr.nfds, -1); +#else + int rc = RTWinPoll(pollmgr.fds, pollmgr.nfds,RT_INDEFINITE_WAIT, &nready); + if (RT_FAILURE(rc)) { + err(EXIT_FAILURE, "poll"); /* XXX: what to do on error? */ + /* NOTREACHED*/ + } +#endif + + DPRINTF2(("%s: ready %d fd%s\n", + __func__, nready, (nready == 1 ? "" : "s"))); + + if (nready < 0) { + if (errno == EINTR) { + continue; + } + + err(EXIT_FAILURE, "poll"); /* XXX: what to do on error? */ + /* NOTREACHED*/ + } + else if (nready == 0) { /* cannot happen, we wait forever (-1) */ + continue; /* - but be defensive */ + } + + + delfirst = INVALID_SOCKET; + pdelprev = &delfirst; + + for (i = 0; (nfds_t)i < pollmgr.nfds && nready > 0; ++i) { + struct pollmgr_handler *handler; + SOCKET fd; + int revents, nevents; + + fd = pollmgr.fds[i].fd; + revents = pollmgr.fds[i].revents; + + /* + * Channel handlers can request deletion of dynamic slots + * by calling pollmgr_del_slot() that clobbers slot's fd. + */ + if (fd == INVALID_SOCKET && i >= POLLMGR_SLOT_FIRST_DYNAMIC) { + /* adjust count if events were pending for that slot */ + if (revents != 0) { + --nready; + } + + /* pretend that slot handler requested deletion */ + nevents = -1; + goto update_events; + } + + if (revents == 0) { + continue; /* next fd */ + } + --nready; + + handler = pollmgr.handlers[i]; + + if (handler != NULL && handler->callback != NULL) { +#if LWIP_PROXY_DEBUG /* DEBUG */ + if (i < POLLMGR_SLOT_FIRST_DYNAMIC) { + if (revents == POLLIN) { + DPRINTF2(("%s: ch %d\n", __func__, i)); + } + else { + DPRINTF2(("%s: ch %d @ revents 0x%x!\n", + __func__, i, revents)); + } + } + else { + DPRINTF2(("%s: fd %d @ revents 0x%x\n", + __func__, fd, revents)); + } +#endif /* DEBUG */ + nevents = (*handler->callback)(handler, fd, revents); + } + else { + DPRINTF0(("%s: invalid handler for fd %d: ", __func__, fd)); + if (handler == NULL) { + DPRINTF0(("NULL\n")); + } + else { + DPRINTF0(("%p (callback = NULL)\n", (void *)handler)); + } + nevents = -1; /* delete it */ + } + + update_events: + if (nevents >= 0) { + if (nevents != pollmgr.fds[i].events) { + DPRINTF2(("%s: fd %d ! nevents 0x%x\n", + __func__, fd, nevents)); + } + pollmgr.fds[i].events = nevents; + } + else if (i < POLLMGR_SLOT_FIRST_DYNAMIC) { + /* Don't garbage-collect channels. */ + DPRINTF2(("%s: fd %d ! DELETED (channel %d)\n", + __func__, fd, i)); + pollmgr.fds[i].fd = INVALID_SOCKET; + pollmgr.fds[i].events = 0; + pollmgr.fds[i].revents = 0; + pollmgr.handlers[i] = NULL; + } + else { + DPRINTF2(("%s: fd %d ! DELETED\n", __func__, fd)); + + /* schedule for deletion (see g/c loop for details) */ + *pdelprev = i; /* make previous entry point to us */ + pdelprev = &pollmgr.fds[i].fd; + + pollmgr.fds[i].fd = INVALID_SOCKET; /* end of list (for now) */ + pollmgr.fds[i].events = POLLMGR_GARBAGE; + pollmgr.fds[i].revents = 0; + pollmgr.handlers[i] = NULL; + } + } /* processing loop */ + + + /* + * Garbage collect and compact the array. + * + * We overload pollfd::fd of garbage entries to store the + * index of the next garbage entry. The garbage list is + * co-directional with the fds array. The index of the first + * entry is in "delfirst", the last entry "points to" + * INVALID_SOCKET. + * + * See update_events code for nevents < 0 at the end of the + * processing loop above. + */ + while (delfirst != INVALID_SOCKET) { + const int last = pollmgr.nfds - 1; + + /* + * We want a live entry in the last slot to swap into the + * freed slot, so make sure we have one. + */ + if (pollmgr.fds[last].events == POLLMGR_GARBAGE /* garbage */ + || pollmgr.fds[last].fd == INVALID_SOCKET) /* or killed */ + { + /* drop garbage entry at the end of the array */ + --pollmgr.nfds; + + if (delfirst == last) { + /* congruent to delnext >= pollmgr.nfds test below */ + delfirst = INVALID_SOCKET; /* done */ + } + } + else { + const SOCKET delnext = pollmgr.fds[delfirst].fd; + + /* copy live entry at the end to the first slot being freed */ + pollmgr.fds[delfirst] = pollmgr.fds[last]; /* struct copy */ + pollmgr.handlers[delfirst] = pollmgr.handlers[last]; + pollmgr.handlers[delfirst]->slot = (int)delfirst; + --pollmgr.nfds; + + if ((nfds_t)delnext >= pollmgr.nfds) { + delfirst = INVALID_SOCKET; /* done */ + } + else { + delfirst = delnext; + } + } + + pollmgr.fds[last].fd = INVALID_SOCKET; + pollmgr.fds[last].events = 0; + pollmgr.fds[last].revents = 0; + pollmgr.handlers[last] = NULL; + } + } /* poll loop */ +} + + +/** + * Create strongly held refptr. + */ +struct pollmgr_refptr * +pollmgr_refptr_create(struct pollmgr_handler *ptr) +{ + struct pollmgr_refptr *rp; + + LWIP_ASSERT1(ptr != NULL); + + rp = (struct pollmgr_refptr *)malloc(sizeof (*rp)); + if (rp == NULL) { + return NULL; + } + + sys_mutex_new(&rp->lock); + rp->ptr = ptr; + rp->strong = 1; + rp->weak = 0; + + return rp; +} + + +static void +pollmgr_refptr_delete(struct pollmgr_refptr *rp) +{ + if (rp == NULL) { + return; + } + + LWIP_ASSERT1(rp->strong == 0); + LWIP_ASSERT1(rp->weak == 0); + + sys_mutex_free(&rp->lock); + free(rp); +} + + +/** + * Add weak reference before "rp" is sent over a poll manager channel. + */ +void +pollmgr_refptr_weak_ref(struct pollmgr_refptr *rp) +{ + sys_mutex_lock(&rp->lock); + + LWIP_ASSERT1(rp->ptr != NULL); + LWIP_ASSERT1(rp->strong > 0); + + ++rp->weak; + + sys_mutex_unlock(&rp->lock); +} + + +/** + * Try to get the pointer from implicitely weak reference we've got + * from a channel. + * + * If we detect that the object is still strongly referenced, but no + * longer registered with the poll manager we abort strengthening + * conversion here b/c lwip thread callback is already scheduled to + * destruct the object. + */ +struct pollmgr_handler * +pollmgr_refptr_get(struct pollmgr_refptr *rp) +{ + struct pollmgr_handler *handler; + size_t weak; + + sys_mutex_lock(&rp->lock); + + LWIP_ASSERT1(rp->weak > 0); + weak = --rp->weak; + + handler = rp->ptr; + if (handler == NULL) { + LWIP_ASSERT1(rp->strong == 0); + sys_mutex_unlock(&rp->lock); + if (weak == 0) { + pollmgr_refptr_delete(rp); + } + return NULL; + } + + LWIP_ASSERT1(rp->strong == 1); + + /* + * Here we woild do: + * + * ++rp->strong; + * + * and then, after channel handler is done, we would decrement it + * back. + * + * Instead we check that the object is still registered with poll + * manager. If it is, there's no race with lwip thread trying to + * drop its strong reference, as lwip thread callback to destruct + * the object is always scheduled by its poll manager callback. + * + * Conversly, if we detect that the object is no longer registered + * with poll manager, we immediately abort. Since channel handler + * can't do anything useful anyway and would have to return + * immediately. + * + * Since channel handler would always find rp->strong as it had + * left it, just elide extra strong reference creation to avoid + * the whole back-and-forth. + */ + + if (handler->slot < 0) { /* no longer polling */ + sys_mutex_unlock(&rp->lock); + return NULL; + } + + sys_mutex_unlock(&rp->lock); + return handler; +} + + +/** + * Remove (the only) strong reference. + * + * If it were real strong/weak pointers, we should also call + * destructor for the referenced object, but + */ +void +pollmgr_refptr_unref(struct pollmgr_refptr *rp) +{ + sys_mutex_lock(&rp->lock); + + LWIP_ASSERT1(rp->strong == 1); + --rp->strong; + + if (rp->strong > 0) { + sys_mutex_unlock(&rp->lock); + } + else { + size_t weak; + + /* void *ptr = rp->ptr; */ + rp->ptr = NULL; + + /* delete ptr; // see doc comment */ + + weak = rp->weak; + sys_mutex_unlock(&rp->lock); + if (weak == 0) { + pollmgr_refptr_delete(rp); + } + } +} diff --git a/src/VBox/NetworkServices/NAT/proxy_pollmgr.h b/src/VBox/NetworkServices/NAT/proxy_pollmgr.h new file mode 100644 index 00000000..68761b9c --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_pollmgr.h @@ -0,0 +1,67 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#ifndef _PROXY_POLLMGR_H_ +#define _PROXY_POLLMGR_H_ + +#ifndef RT_OS_WINDOWS +# include <unistd.h> /* for ssize_t */ +#endif +#include "lwip/sys.h" + +enum pollmgr_slot_t { + POLLMGR_CHAN_PXTCP_ADD, /* new proxy tcp connection from guest */ + POLLMGR_CHAN_PXTCP_POLLIN, /* free space in ringbuf, may POLLIN */ + POLLMGR_CHAN_PXTCP_POLLOUT, /* schedule one-shot POLLOUT callback */ + POLLMGR_CHAN_PXTCP_DEL, /* delete pxtcp */ + POLLMGR_CHAN_PXTCP_RESET, /* send RST and delete pxtcp */ + + POLLMGR_CHAN_PXUDP_ADD, /* new proxy udp conversation from guest */ + POLLMGR_CHAN_PXUDP_DEL, /* delete pxudp from pollmgr */ + + POLLMGR_CHAN_PORTFWD, /* add/remove port forwarding rules */ + + POLLMGR_SLOT_STATIC_COUNT, + POLLMGR_SLOT_FIRST_DYNAMIC = POLLMGR_SLOT_STATIC_COUNT +}; + + +struct pollmgr_handler; /* forward */ +typedef int (*pollmgr_callback)(struct pollmgr_handler *, SOCKET, int); + +struct pollmgr_handler { + pollmgr_callback callback; + void *data; + int slot; +}; + +struct pollmgr_refptr { + struct pollmgr_handler *ptr; + sys_mutex_t lock; + size_t strong; + size_t weak; +}; + +int pollmgr_init(void); + +/* static named slots (aka "channels") */ +SOCKET pollmgr_add_chan(int, struct pollmgr_handler *); +ssize_t pollmgr_chan_send(int, void *buf, size_t nbytes); +void *pollmgr_chan_recv_ptr(struct pollmgr_handler *, SOCKET, int); + +/* dynamic slots */ +int pollmgr_add(struct pollmgr_handler *, SOCKET, int); + +/* special-purpose strong/weak references */ +struct pollmgr_refptr *pollmgr_refptr_create(struct pollmgr_handler *); +void pollmgr_refptr_weak_ref(struct pollmgr_refptr *); +struct pollmgr_handler *pollmgr_refptr_get(struct pollmgr_refptr *); +void pollmgr_refptr_unref(struct pollmgr_refptr *); + +void pollmgr_update_events(int, int); +void pollmgr_del_slot(int); + +void pollmgr_thread(void *); + +/* buffer for callbacks to receive udp without worrying about truncation */ +extern u8_t pollmgr_udpbuf[64 * 1024]; + +#endif /* _PROXY_POLLMGR_H_ */ diff --git a/src/VBox/NetworkServices/NAT/proxy_rtadvd.c b/src/VBox/NetworkServices/NAT/proxy_rtadvd.c new file mode 100644 index 00000000..f3a10863 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_rtadvd.c @@ -0,0 +1,399 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "winutils.h" + +#include "proxy.h" + +#include "lwip/opt.h" +#include "lwip/sys.h" +#include "lwip/stats.h" +#include "lwip/timers.h" + +#include "lwip/inet_chksum.h" +#include "lwip/icmp6.h" +#include "lwip/nd6.h" + +#include "lwip/raw.h" + +#include <string.h> + + +static void proxy_rtadvd_timer(void *); +static void proxy_rtadvd_send_multicast(struct netif *); +static void proxy_rtadvd_fill_payload(struct netif *, int); + +static u8_t rtadvd_recv(void *, struct raw_pcb *, struct pbuf *, ip6_addr_t *); + + +/* ff02::1 - link-local all nodes multicast address */ +static ip6_addr_t allnodes_linklocal = { + { PP_HTONL(0xff020000UL), 0, 0, PP_HTONL(0x00000001UL) } +}; + + +/* + * Unsolicited Router Advertisement payload. + * + * NB: Since ICMP checksum covers pseudo-header with destination + * address (link-local allnodes multicast in this case) this payload + * cannot be used for solicited replies to unicast addresses. + */ +static unsigned int unsolicited_ra_payload_length; +static u8_t unsolicited_ra_payload[ + sizeof(struct ra_header) + /* reserves enough space for NETIF_MAX_HWADDR_LEN */ + + sizeof(struct lladdr_option) + /* we only announce one prefix */ + + sizeof(struct prefix_option) * 1 +]; + + +static int ndefaults = 0; + +static struct raw_pcb *rtadvd_pcb; + + +void +proxy_rtadvd_start(struct netif *proxy_netif) +{ +#if 0 /* XXX */ + ndefaults = rtmon_get_defaults(); +#else + ndefaults = g_proxy_options->ipv6_defroute; +#endif + if (ndefaults < 0) { + DPRINTF0(("rtadvd: failed to read IPv6 routing table, aborting\n")); + return; + } + + proxy_rtadvd_fill_payload(proxy_netif, ndefaults > 0); + + rtadvd_pcb = raw_new_ip6(IP6_NEXTH_ICMP6); + if (rtadvd_pcb == NULL) { + DPRINTF0(("rtadvd: failed to allocate pcb, aborting\n")); + return; + } + + /* + * We cannot use raw_bind_ip6() since raw_input() doesn't grok + * multicasts. We are going to use ip6_output_if() directly. + */ + raw_recv_ip6(rtadvd_pcb, rtadvd_recv, proxy_netif); + + sys_timeout(3 * 1000, proxy_rtadvd_timer, proxy_netif); +} + + +static int quick_ras = 2; + + +/** + * lwIP thread callback invoked when we start/stop advertising default + * route. + */ +void +proxy_rtadvd_do_quick(void *arg) +{ + struct netif *proxy_netif = (struct netif *)arg; + + quick_ras = 2; + sys_untimeout(proxy_rtadvd_timer, proxy_netif); + proxy_rtadvd_timer(proxy_netif); /* sends and re-arms */ +} + + +static void +proxy_rtadvd_timer(void *arg) +{ + struct netif *proxy_netif = (struct netif *)arg; + int newdefs; + u32_t delay; + +#if 0 /* XXX */ + newdefs = rtmon_get_defaults(); +#else + newdefs = g_proxy_options->ipv6_defroute; +#endif + if (newdefs != ndefaults && newdefs != -1) { + ndefaults = newdefs; + proxy_rtadvd_fill_payload(proxy_netif, ndefaults > 0); + } + + proxy_rtadvd_send_multicast(proxy_netif); + + if (quick_ras > 0) { + --quick_ras; + delay = 16 * 1000; + } + else { + delay = 600 * 1000; + } + + sys_timeout(delay, proxy_rtadvd_timer, proxy_netif); +} + + +/* + * This should be folded into icmp6/nd6 input, but I don't want to + * solve this in general, making it configurable, etc. + * + * Cf. RFC 4861: + * 6.1.1. Validation of Router Solicitation Messages + */ +static u8_t +rtadvd_recv(void *arg, struct raw_pcb *pcb, struct pbuf *p, ip6_addr_t *addr) +{ + enum raw_recv_status { RAW_RECV_CONTINUE = 0, RAW_RECV_CONSUMED = 1 }; + + struct netif *proxy_netif = (struct netif *)arg; + struct ip6_hdr *ip6_hdr; + struct icmp6_hdr *icmp6_hdr; + struct lladdr_option *lladdr_opt; + void *option; + u8_t opttype, optlen8; + + LWIP_UNUSED_ARG(pcb); + LWIP_UNUSED_ARG(addr); + + /* save a pointer to IP6 header and skip to ICMP6 payload */ + ip6_hdr = (struct ip6_hdr *)p->payload; + pbuf_header(p, -ip_current_header_tot_len()); + + if (p->len < sizeof(struct icmp6_hdr)) { + ICMP6_STATS_INC(icmp6.lenerr); + goto drop; + } + + if (ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->tot_len, + ip6_current_src_addr(), + ip6_current_dest_addr()) != 0) + { + ICMP6_STATS_INC(icmp6.chkerr); + goto drop; + } + + icmp6_hdr = (struct icmp6_hdr *)p->payload; + if (icmp6_hdr->type != ICMP6_TYPE_RS) { + pbuf_header(p, ip_current_header_tot_len()); /* restore payload ptr */ + return RAW_RECV_CONTINUE; /* not interested */ + } + + /* only now that we know it's ICMP6_TYPE_RS we can check IP6 hop limit */ + if (IP6H_HOPLIM(ip6_hdr) != 255) { + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + + /* future, backward-incompatible changes may use different Code values. */ + if (icmp6_hdr->code != 0) { + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + + /* skip past rs_header, nothing interesting in it */ + if (p->len < sizeof(struct rs_header)) { + ICMP6_STATS_INC(icmp6.lenerr); + goto drop; + } + pbuf_header(p, -(s16_t)sizeof(struct rs_header)); + + lladdr_opt = NULL; + while (p->len > 0) { + int optlen; + + if (p->len < 8) { + ICMP6_STATS_INC(icmp6.lenerr); + goto drop; + } + + option = p->payload; + opttype = ((u8_t *)option)[0]; + optlen8 = ((u8_t *)option)[1]; /* in units of 8 octets */ + + if (optlen8 == 0) { + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + + optlen = (unsigned int)optlen8 << 3; + if (p->len < optlen) { + ICMP6_STATS_INC(icmp6.lenerr); + goto drop; + } + + if (opttype == ND6_OPTION_TYPE_SOURCE_LLADDR) { + if (lladdr_opt != NULL) { /* duplicate */ + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + lladdr_opt = (struct lladdr_option *)option; + } + + pbuf_header(p, -optlen); + } + + if (ip6_addr_isany(ip6_current_src_addr())) { + if (lladdr_opt != NULL) { + ICMP6_STATS_INC(icmp6.proterr); + goto drop; + } + + /* reply with multicast RA */ + } + else { + /* + * XXX: Router is supposed to update its Neighbor Cache (6.2.6), + * but it's hidden inside nd6.c. + */ + + /* may reply with either unicast or multicast RA */ + } + /* we just always reply with multicast RA */ + + pbuf_free(p); /* NB: this invalidates lladdr_option */ + + sys_untimeout(proxy_rtadvd_timer, proxy_netif); + proxy_rtadvd_timer(proxy_netif); /* sends and re-arms */ + + return RAW_RECV_CONSUMED; + + drop: + pbuf_free(p); + ICMP6_STATS_INC(icmp6.drop); + return RAW_RECV_CONSUMED; +} + + +static void +proxy_rtadvd_send_multicast(struct netif *proxy_netif) +{ + struct pbuf *ph, *pp; + err_t error; + + ph = pbuf_alloc(PBUF_IP, 0, PBUF_RAM); + if (ph == NULL) { + DPRINTF0(("%s: failed to allocate RA header pbuf", __func__)); + return; + } + + pp = pbuf_alloc(PBUF_RAW, unsolicited_ra_payload_length, PBUF_ROM); + if (pp == NULL) { + DPRINTF0(("%s: failed to allocate RA payload pbuf", __func__)); + pbuf_free(ph); + return; + } + pp->payload = unsolicited_ra_payload; + pbuf_chain(ph, pp); + + error = ip6_output_if(ph, + netif_ip6_addr(proxy_netif, 0), /* src: link-local */ + &allnodes_linklocal, /* dst */ + 255, /* hop limit */ + 0, /* traffic class */ + IP6_NEXTH_ICMP6, + proxy_netif); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to send RA (err=%d)", __func__, error)); + } + + pbuf_free(pp); + pbuf_free(ph); +} + + +/* + * XXX: TODO: Only ra_header::router_lifetime (and hence + * ra_header::chksum) need to be changed, so we can precompute it once + * and then only update these two fields. + */ +static void +proxy_rtadvd_fill_payload(struct netif *proxy_netif, int is_default) +{ + struct pbuf *p; + struct ra_header *ra_hdr; + struct lladdr_option *lladdr_opt; + struct prefix_option *pfx_opt; + unsigned int lladdr_optlen; + + LWIP_ASSERT("netif hwaddr too long", + proxy_netif->hwaddr_len <= NETIF_MAX_HWADDR_LEN); + + /* type + length + ll addr + round up to 8 octets */ + lladdr_optlen = (2 + proxy_netif->hwaddr_len + 7) & ~0x7; + + /* actual payload length */ + unsolicited_ra_payload_length = + sizeof(struct ra_header) + + lladdr_optlen + + sizeof(struct prefix_option) * 1; + + /* Set fields. */ + ra_hdr = (struct ra_header *)unsolicited_ra_payload; + lladdr_opt = (struct lladdr_option *)((u8_t *)ra_hdr + sizeof(struct ra_header)); + pfx_opt = (struct prefix_option *)((u8_t *)lladdr_opt + lladdr_optlen); + + memset(unsolicited_ra_payload, 0, sizeof(unsolicited_ra_payload)); + + ra_hdr->type = ICMP6_TYPE_RA; + +#if 0 + /* + * "M" flag. Tell guests to use stateful DHCP6. Disabled here + * since we don't provide stateful server. + */ + ra_hdr->flags |= ND6_RA_FLAG_MANAGED_ADDR_CONFIG; +#endif + /* + * XXX: TODO: Disable "O" flag for now to match disabled stateless + * server. We don't yet get IPv6 nameserver addresses from + * HostDnsService, so we have nothing to say, don't tell guests to + * come asking. + */ +#if 0 + /* + * "O" flag. Tell guests to use DHCP6 for DNS and the like. This + * is served by simple stateless server (RFC 3736). + * + * XXX: "STATEFUL" in the flag name was probably a bug in RFC2461. + * It's present in the text, but not in the router configuration + * variable name. It's dropped in the text in RFC4861. + */ + ra_hdr->flags |= ND6_RA_FLAG_OTHER_STATEFUL_CONFIG; +#endif + + if (is_default) { + ra_hdr->router_lifetime = PP_HTONS(1200); /* seconds */ + } + else { + ra_hdr->router_lifetime = 0; + } + + lladdr_opt->type = ND6_OPTION_TYPE_SOURCE_LLADDR; + lladdr_opt->length = lladdr_optlen >> 3; /* in units of 8 octets */ + memcpy(lladdr_opt->addr, proxy_netif->hwaddr, proxy_netif->hwaddr_len); + + pfx_opt->type = ND6_OPTION_TYPE_PREFIX_INFO; + pfx_opt->length = 4; + pfx_opt->prefix_length = 64; + pfx_opt->flags = ND6_PREFIX_FLAG_ON_LINK + | ND6_PREFIX_FLAG_AUTONOMOUS; + pfx_opt->valid_lifetime = ~0U; /* infinite */ + pfx_opt->preferred_lifetime = ~0U; /* infinite */ + pfx_opt->prefix.addr[0] = netif_ip6_addr(proxy_netif, 1)->addr[0]; + pfx_opt->prefix.addr[1] = netif_ip6_addr(proxy_netif, 1)->addr[1]; + + + /* we need a temp pbuf to calculate the checksum */ + p = pbuf_alloc(PBUF_IP, unsolicited_ra_payload_length, PBUF_ROM); + if (p == NULL) { + DPRINTF0(("rtadvd: failed to allocate RA pbuf\n")); + return; + } + p->payload = unsolicited_ra_payload; + + ra_hdr->chksum = ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->len, + /* src addr: netif's link-local */ + netif_ip6_addr(proxy_netif, 0), + /* dst addr */ + &allnodes_linklocal); + pbuf_free(p); +} diff --git a/src/VBox/NetworkServices/NAT/proxy_tftpd.c b/src/VBox/NetworkServices/NAT/proxy_tftpd.c new file mode 100644 index 00000000..a29ac687 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/proxy_tftpd.c @@ -0,0 +1,956 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#define _USE_WINSTD_ERRNO +/* XXX: replace POSIX file operations with IPRT, to avoid hacks with errno renamings */ +#include "winutils.h" + +#include "proxy.h" +#include "tftp.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#else +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <io.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +# define O_RDONLY _O_RDONLY +# define S_ISREG(x) ((x) & _S_IFREG) +#endif + +#include "lwip/timers.h" +#include "lwip/udp.h" + +#include <iprt/string.h> + +struct xfer { + struct udp_pcb *pcb; + int fd; + unsigned int ack; + struct pbuf *pbuf; + + struct pbuf *oack; + + int rexmit; + + ipX_addr_t peer_ip; + u16_t peer_port; + + char *filename; + int octet; + + /* options */ + unsigned int blksize; + int blksize_from_opt; + + unsigned int timeout; + int timeout_from_opt; + + off_t tsize; + int tsize_from_opt; +}; + +struct tftpd { + struct udp_pcb *pcb; + char *root; + +#define TFTP_MAX_XFERS 3 + struct xfer xfers[TFTP_MAX_XFERS]; +}; + +struct tftp_option { + const char *name; + int (*getopt)(struct xfer *, const char *); + int (*ackopt)(struct xfer *, char **, size_t *); +}; + + +static void tftpd_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); + +static void tftpd_rrq(struct pbuf *, ip_addr_t *, u16_t); + +static void tftp_xfer_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); + +static void tftp_recv_ack(struct xfer *, u16_t); +static void tftp_fillbuf(struct xfer *); +static void tftp_send(struct xfer *); +static void tftp_timeout(void *); + +static struct xfer *tftp_xfer_alloc(ip_addr_t *, u16_t); +static int tftp_xfer_create_pcb(struct xfer *); +static void tftp_xfer_free(struct xfer *); + +static int tftp_parse_filename(struct xfer *, char **, size_t *); +static int tftp_parse_mode(struct xfer *, char **, size_t *); +static int tftp_parse_option(struct xfer *, char **, size_t *); + +static int tftp_opt_blksize(struct xfer *, const char *); +static int tftp_opt_timeout(struct xfer *, const char *); +static int tftp_opt_tsize(struct xfer *, const char *); + +static char *tftp_getstr(struct xfer *, const char *, char **, size_t *); + +static int tftp_ack_blksize(struct xfer *, char **, size_t *); +static int tftp_ack_timeout(struct xfer *, char **, size_t *); +static int tftp_ack_tsize(struct xfer *, char **, size_t *); + +static int tftp_add_oack(char **, size_t *, const char *, const char *, ...) __attribute__((format(printf, 4, 5))); + +static ssize_t tftp_strnlen(char *, size_t); + +static int tftp_internal_error(struct xfer *); +static int tftp_error(struct xfer *, u16_t, const char *, ...) __attribute__((format(printf, 3, 4))); +static void tftpd_error(ip_addr_t *, u16_t, u16_t, const char *, ...) __attribute__((format(printf, 4, 5))); +static struct pbuf *tftp_verror(u16_t, const char *, va_list); + + +/* const */ int report_transient_errors = 1; +static struct tftpd tftpd; + +static struct tftp_option tftp_options[] = { + { "blksize", tftp_opt_blksize, tftp_ack_blksize }, /* RFC 2348 */ + { "timeout", tftp_opt_timeout, tftp_ack_timeout }, /* RFC 2349 */ + { "tsize", tftp_opt_tsize, tftp_ack_tsize }, /* RFC 2349 */ + { NULL, NULL, NULL } +}; + + +err_t +tftpd_init(struct netif *proxy_netif, const char *tftproot) +{ + size_t len; + err_t error; + + tftpd.root = strdup(tftproot); + if (tftpd.root == NULL) { + DPRINTF0(("%s: failed to allocate tftpd.root\n", __func__)); + return ERR_MEM; + } + + len = strlen(tftproot); + if (tftpd.root[len - 1] == '/') { + tftpd.root[len - 1] = '\0'; + } + + tftpd.pcb = udp_new(); + if (tftpd.pcb == NULL) { + DPRINTF0(("%s: failed to allocate PCB\n", __func__)); + return ERR_MEM; + } + + udp_recv(tftpd.pcb, tftpd_recv, NULL); + + error = udp_bind(tftpd.pcb, &proxy_netif->ip_addr, TFTP_SERVER_PORT); + if (error != ERR_OK) { + DPRINTF0(("%s: failed to bind PCB\n", __func__)); + return error; + } + + return ERR_OK; +} + + +static void +tftpd_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + u16_t op; + + LWIP_ASSERT1(pcb == tftpd.pcb); + + LWIP_UNUSED_ARG(pcb); /* only in assert */ + LWIP_UNUSED_ARG(arg); + + if (pbuf_clen(p) > 1) { /* this code assumes contiguous aligned payload */ + pbuf_free(p); + return; + } + + op = ntohs(*(u16_t *)p->payload); + switch (op) { + case TFTP_RRQ: + tftpd_rrq(p, addr, port); + break; + + case TFTP_WRQ: + tftpd_error(addr, port, TFTP_EACCESS, "Permission denied"); + break; + + default: + tftpd_error(addr, port, TFTP_ENOSYS, "Bad opcode %d", op); + break; + } + + pbuf_free(p); +} + + +/** + * Parse Read Request packet and start new transfer. + */ +static void +tftpd_rrq(struct pbuf *p, ip_addr_t *addr, u16_t port) +{ + struct xfer *xfer; + char *s; + size_t len; + int has_options; + int status; + + xfer = tftp_xfer_alloc(addr, port); + if (xfer == NULL) { + return; + } + + /* skip opcode */ + s = (char *)p->payload + sizeof(u16_t); + len = p->len - sizeof(u16_t); + + + /* + * Parse RRQ: + * filename, mode, [opt1, value1, [...] ] + */ + status = tftp_parse_filename(xfer, &s, &len); + if (status < 0) { + goto terminate; + } + + status = tftp_parse_mode(xfer, &s, &len); + if (status < 0) { + goto terminate; + } + + has_options = 0; + while (len > 0) { + status = tftp_parse_option(xfer, &s, &len); + if (status < 0) { + goto terminate; + } + has_options += status; + } + + + /* + * Create OACK packet if necessary. + */ + if (has_options) { + xfer->oack = pbuf_alloc(PBUF_RAW, 128, PBUF_RAM); + if (xfer->oack != NULL) { + struct tftp_option *o; + + ((u16_t *)xfer->oack->payload)[0] = PP_HTONS(TFTP_OACK); + + s = (char *)xfer->oack->payload + sizeof(u16_t); + len = xfer->oack->len - sizeof(u16_t); + + for (o = &tftp_options[0]; o->name != NULL; ++o) { + status = (*o->ackopt)(xfer, &s, &len); + if (status < 0) { + pbuf_free(xfer->oack); + xfer->oack = NULL; + break; + } + } + + if (xfer->oack != NULL) { + pbuf_realloc(xfer->oack, xfer->oack->len - len); + } + } + } + + + /* + * Create static pbuf that will be used for all data packets. + */ + xfer->pbuf = pbuf_alloc(PBUF_RAW, xfer->blksize + 4, PBUF_RAM); + if (xfer->pbuf == NULL) { + tftp_internal_error(xfer); + goto terminate; + } + ((u16_t *)xfer->pbuf->payload)[0] = PP_HTONS(TFTP_DATA); + + + /* + * Finally, create PCB. Before this point any error was reported + * from the server port (see tftp_error() for the reason). + */ + status = tftp_xfer_create_pcb(xfer); + if (status < 0) { + goto terminate; + } + + if (xfer->oack) { + tftp_send(xfer); + } + else { + /* trigger send of the first data packet */ + tftp_recv_ack(xfer, 0); + } + + return; + + terminate: + DPRINTF(("%s: terminated", __func__)); + tftp_xfer_free(xfer); +} + + +static void +tftp_xfer_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct xfer *xfer = (struct xfer *)arg; + u16_t op; + + LWIP_UNUSED_ARG(pcb); /* assert only */ + LWIP_UNUSED_ARG(addr); + LWIP_UNUSED_ARG(port); + + LWIP_ASSERT1(xfer->pcb == pcb); + + if (p->len < 2) { + tftp_error(xfer, TFTP_ENOSYS, "Short packet"); + tftp_xfer_free(xfer); + pbuf_free(p); + return; + } + + op = ntohs(*(u16_t *)p->payload); + if (op == TFTP_ACK) { + u16_t ack; + + if (p->len < 4) { + tftp_error(xfer, TFTP_ENOSYS, "Short packet"); + tftp_xfer_free(xfer); + pbuf_free(p); + return; + } + + ack = ntohs(((u16_t *)p->payload)[1]); + tftp_recv_ack(xfer, ack); + } + else if (op == TFTP_ERROR) { + tftp_xfer_free(xfer); + } + else { + tftp_error(xfer, TFTP_ENOSYS, "Unexpected opcode %d", op); + tftp_xfer_free(xfer); + } + + pbuf_free(p); +} + + +static void +tftp_recv_ack(struct xfer *xfer, u16_t ack) +{ + if (ack != (u16_t)xfer->ack) { + DPRINTF2(("%s: expect %u (%u), got %u\n", + __func__, (u16_t)xfer->ack, xfer->ack, ack)); + return; + } + + sys_untimeout(tftp_timeout, xfer); + xfer->rexmit = 0; + + if (xfer->pbuf->len < xfer->blksize) { + DPRINTF(("%s: got final ack %u (%u)\n", + __func__, (u16_t)xfer->ack, xfer->ack)); + tftp_xfer_free(xfer); + return; + } + + if (xfer->oack != NULL) { + pbuf_free(xfer->oack); + xfer->oack = NULL; + } + + ++xfer->ack; + tftp_fillbuf(xfer); + tftp_send(xfer); +} + + +static void +tftp_send(struct xfer *xfer) +{ + struct pbuf *pbuf; + + pbuf = xfer->oack ? xfer->oack : xfer->pbuf; + udp_send(xfer->pcb, pbuf); + sys_timeout(xfer->timeout * 1000, tftp_timeout, xfer); +} + + +static void +tftp_timeout(void *arg) +{ + struct xfer *xfer = (struct xfer *)arg; + int maxrexmit; + + maxrexmit = xfer->timeout < 60 ? 5 : 3; + if (++xfer->rexmit < maxrexmit) { + tftp_send(xfer); + } + else { + tftp_xfer_free(xfer); + } +} + + +static void +tftp_fillbuf(struct xfer *xfer) +{ + ssize_t nread; + + DPRINTF2(("%s: reading block %u\n", __func__, xfer->ack)); + + ((u16_t *)xfer->pbuf->payload)[1] = htons(xfer->ack); + nread = read(xfer->fd, (char *)xfer->pbuf->payload + 4, xfer->blksize); + + if (nread < 0) { + tftp_error(xfer, TFTP_EUNDEF, "Read failed"); + return; + } + + pbuf_realloc(xfer->pbuf, nread + 4); +} + + +/** + * Find a free transfer slot (without a pcb). Record peer's IP + * address and port, but don't allocate a pcb yet. + * + * We delay creation of the pcb in response to the original request + * until the request is verified and accepted. This makes using + * tcpdump(8) easier, since tcpdump does not track TFTP transfers, so + * an error reply from a new pcb is not recognized as such and is not + * decoded as TFTP (see tftp_error()). + * + * If the request is rejected, the pcb remains NULL and the transfer + * slot remains unallocated. Since all TFTP processing happens on the + * lwIP thread, there's no concurrent processing, so we don't need to + * "lock" the transfer slot until the pcb is allocated. + */ +static struct xfer * +tftp_xfer_alloc(ip_addr_t *addr, u16_t port) +{ + struct xfer *xfer; + int i; + + /* Find free xfer slot */ + xfer = NULL; + for (i = 0; i < TFTP_MAX_XFERS; ++i) { + if (tftpd.xfers[i].pcb == NULL) { + xfer = &tftpd.xfers[i]; + break; + } + } + + if (xfer == NULL) { + if (report_transient_errors) { + tftpd_error(addr, port, TFTP_EUNDEF, + "Maximum number of simultaneous connections exceeded"); + } + return NULL; + } + + ipX_addr_copy(0, xfer->peer_ip, *ip_2_ipX(addr)); + xfer->peer_port = port; + + xfer->ack = 0; + + xfer->pbuf = NULL; + xfer->oack = NULL; + xfer->rexmit = 0; + + xfer->blksize = 512; + xfer->blksize_from_opt = 0; + + xfer->timeout = 1; + xfer->timeout_from_opt = 0; + + xfer->tsize = -1; + xfer->tsize_from_opt = 0; + + return xfer; +} + + +static int +tftp_xfer_create_pcb(struct xfer *xfer) +{ + struct udp_pcb *pcb; + err_t error; + + pcb = udp_new(); + + /* Bind */ + if (pcb != NULL) { + error = udp_bind(pcb, ipX_2_ip(&tftpd.pcb->local_ip), 0); + if (error != ERR_OK) { + udp_remove(pcb); + pcb = NULL; + } + } + + /* Connect */ + if (pcb != NULL) { + error = udp_connect(pcb, ipX_2_ip(&xfer->peer_ip), xfer->peer_port); + if (error != ERR_OK) { + udp_remove(pcb); + pcb = NULL; + } + } + + if (pcb == NULL) { + if (report_transient_errors) { + tftp_error(xfer, TFTP_EUNDEF, "Failed to create connection"); + } + return -1; + } + + xfer->pcb = pcb; + udp_recv(xfer->pcb, tftp_xfer_recv, xfer); + + return 0; +} + + +static void +tftp_xfer_free(struct xfer *xfer) +{ + sys_untimeout(tftp_timeout, xfer); + + if (xfer->pcb != NULL) { + udp_remove(xfer->pcb); + xfer->pcb = NULL; + } + + if (xfer->fd > 0) { + close(xfer->fd); + xfer->fd = -1; + } + + if (xfer->oack != NULL) { + pbuf_free(xfer->oack); + xfer->oack = NULL; + } + + if (xfer->pbuf != NULL) { + pbuf_free(xfer->pbuf); + xfer->pbuf = NULL; + } + + if (xfer->filename != NULL) { + free(xfer->filename); + xfer->filename = NULL; + } +} + + +static int +tftp_parse_filename(struct xfer *xfer, char **ps, size_t *plen) +{ + const char *filename; + struct stat st; + char *pathname; + char *s; + size_t len; + int status; + + filename = tftp_getstr(xfer, "filename", ps, plen); + if (filename == NULL) { + return -1; + } + + DPRINTF(("%s: requested file name: %s\n", __func__, filename)); + xfer->filename = strdup(filename); + if (xfer->filename == NULL) { + return tftp_internal_error(xfer); + } + + /* replace backslashes with forward slashes */ + s = xfer->filename; + while ((s = strchr(s, '\\')) != NULL) { + *s++ = '/'; + } + + /* deny attempts to break out of tftp dir */ + if (strncmp(xfer->filename, "../", 3) == 0 + || strstr(xfer->filename, "/../") != NULL) + { + return tftp_error(xfer, TFTP_ENOENT, "Permission denied"); + } + + len = strlen(tftpd.root) + 1 /*slash*/ + strlen(xfer->filename) + 1 /*nul*/; + pathname = (char *)malloc(len); + if (pathname == NULL) { + return tftp_internal_error(xfer); + } + + status = RTStrPrintf(pathname, len, "%s/%s", tftpd.root, xfer->filename); + if (status < 0) { + return tftp_internal_error(xfer); + } + + DPRINTF(("%s: full pathname: %s\n", __func__, pathname)); + xfer->fd = open(pathname, O_RDONLY); + if (xfer->fd < 0) { + if (errno == EPERM) { + return tftp_error(xfer, TFTP_ENOENT, "Permission denied"); + } + else { + return tftp_error(xfer, TFTP_ENOENT, "File not found"); + } + } + + status = fstat(xfer->fd, &st); + if (status < 0) { + return tftp_internal_error(xfer); + } + + if (!S_ISREG(st.st_mode)) { + return tftp_error(xfer, TFTP_ENOENT, "File not found"); + } + + xfer->tsize = st.st_size; + return 0; +} + + +static int +tftp_parse_mode(struct xfer *xfer, char **ps, size_t *plen) +{ + const char *modename; + + modename = tftp_getstr(xfer, "mode", ps, plen); + if (modename == NULL) { + return -1; + } + + if (RTStrICmp(modename, "octet") == 0) { + xfer->octet = 1; + } + else if (RTStrICmp(modename, "netascii") == 0) { + xfer->octet = 0; + /* XXX: not (yet?) */ + return tftp_error(xfer, TFTP_ENOSYS, "Mode \"netascii\" not supported"); + } + else if (RTStrICmp(modename, "mail") == 0) { + return tftp_error(xfer, TFTP_ENOSYS, "Mode \"mail\" not supported"); + } + else { + return tftp_error(xfer, TFTP_ENOSYS, "Unknown mode \"%s\"", modename); + } + + return 0; +} + + +static int +tftp_parse_option(struct xfer *xfer, char **ps, size_t *plen) +{ + const char *opt; + const char *val; + struct tftp_option *o; + + opt = tftp_getstr(xfer, "option name", ps, plen); + if (opt == NULL) { + return -1; + } + + if (*plen == 0) { + return tftp_error(xfer, TFTP_EUNDEF, "Missing option value"); + } + + val = tftp_getstr(xfer, "option value", ps, plen); + if (val == NULL) { + return -1; + } + + /* handle option if known, ignore otherwise */ + for (o = &tftp_options[0]; o->name != NULL; ++o) { + if (RTStrICmp(o->name, opt) == 0) { + return (*o->getopt)(xfer, val); + } + } + + return 0; /* unknown option */ +} + + +static int +tftp_opt_blksize(struct xfer *xfer, const char *optval) +{ + char *end; + long blksize; + + errno = 0; + blksize = strtol(optval, &end, 10); + if (errno != 0 || *end != '\0') { + return 0; + } + + if (blksize < 8) { + return 0; + } + + if (blksize > 1428) { /* exceeds ethernet mtu */ + blksize = 1428; + } + + xfer->blksize = blksize; + xfer->blksize_from_opt = 1; + return 1; +} + + +static int +tftp_opt_timeout(struct xfer *xfer, const char *optval) +{ + LWIP_UNUSED_ARG(xfer); + LWIP_UNUSED_ARG(optval); + return 0; +} + + +static int +tftp_opt_tsize(struct xfer *xfer, const char *optval) +{ + LWIP_UNUSED_ARG(optval); /* must be "0", but we don't check it */ + + if (xfer->tsize < 0) { + return 0; + } + + xfer->tsize_from_opt = 1; + return 1; +} + + +static char * +tftp_getstr(struct xfer *xfer, const char *msg, char **ps, size_t *plen) +{ + char *s; + ssize_t slen; + + s = *ps; + slen = tftp_strnlen(s, *plen); + if (slen < 0) { + tftp_error(xfer, TFTP_EUNDEF, "Unterminated %s", msg); + return NULL; + } + + *ps += slen + 1; + *plen -= slen + 1; + + return s; +} + + +static int +tftp_ack_blksize(struct xfer *xfer, char **ps, size_t *plen) +{ + if (!xfer->blksize_from_opt) { + return 0; + } + + return tftp_add_oack(ps, plen, "blksize", "%u", xfer->blksize); +} + + +static int +tftp_ack_timeout(struct xfer *xfer, char **ps, size_t *plen) +{ + if (!xfer->timeout_from_opt) { + return 0; + } + + return tftp_add_oack(ps, plen, "timeout", "%u", xfer->timeout); +} + + +static int +tftp_ack_tsize(struct xfer *xfer, char **ps, size_t *plen) +{ + if (!xfer->tsize_from_opt) { + return 0; + } + + LWIP_ASSERT1(xfer->tsize >= 0); + return tftp_add_oack(ps, plen, "tsize", + /* XXX: FIXME: want 64 bit */ + "%lu", (unsigned long)xfer->tsize); +} + + +static int +tftp_add_oack(char **ps, size_t *plen, + const char *optname, const char *fmt, ...) +{ + va_list ap; + int sz; + + sz = RTStrPrintf(*ps, *plen, "%s", optname); + if (sz < 0 || (size_t)sz >= *plen) { + return -1; + } + + ++sz; /* for nul byte */ + *ps += sz; + *plen -= sz; + + va_start(ap, fmt); + sz = vsnprintf(*ps, *plen, fmt, ap); + va_end(ap); + if (sz < 0 || (size_t)sz >= *plen) { + return -1; + } + + ++sz; /* for nul byte */ + *ps += sz; + *plen -= sz; + + return 0; +} + + +static ssize_t +tftp_strnlen(char *buf, size_t bufsize) +{ + void *end; + + end = memchr(buf, '\0', bufsize); + if (end == NULL) { + return -1; + } + + return (char *)end - buf; +} + + +static int +tftp_internal_error(struct xfer *xfer) +{ + if (report_transient_errors) { + tftp_error(xfer, TFTP_EUNDEF, "Internal error"); + } + return -1; +} + + +/** + * Send an error packet to the peer. + * + * PCB may not be created yet in which case send the error packet from + * the TFTP server port (*). + * + * (*) We delay creation of the PCB in response to the original + * request until the request is verified and accepted. This makes + * using tcpdump(8) easier, since tcpdump does not track TFTP + * transfers, so an error reply from a new PCB is not recognized as + * such and is not decoded as TFTP. + * + * Always returns -1 for callers to reuse. + */ +static int +tftp_error(struct xfer *xfer, u16_t error, const char *fmt, ...) +{ + va_list ap; + struct pbuf *q; + + LWIP_ASSERT1(xfer != NULL); + + va_start(ap, fmt); + q = tftp_verror(error, fmt, ap); + va_end(ap); + + if (q == NULL) { + return -1; + } + + if (xfer->pcb != NULL) { + udp_send(xfer->pcb, q); + } + else { + udp_sendto(tftpd.pcb, q, ipX_2_ip(&xfer->peer_ip), xfer->peer_port); + } + + pbuf_free(q); + return -1; +} + + +/** + * Send an error packet from TFTP server port to the specified peer. + */ +static void +tftpd_error(ip_addr_t *addr, u16_t port, u16_t error, const char *fmt, ...) +{ + va_list ap; + struct pbuf *q; + + va_start(ap, fmt); + q = tftp_verror(error, fmt, ap); + va_end(ap); + + if (q != NULL) { + udp_sendto(tftpd.pcb, q, addr, port); + pbuf_free(q); + } +} + + +/** + * Create ERROR pbuf with formatted error message. + */ +static struct pbuf * +tftp_verror(u16_t error, const char *fmt, va_list ap) +{ + struct tftp_error { + u16_t opcode; /* TFTP_ERROR */ + u16_t errcode; + char errmsg[512]; + }; + + struct pbuf *p; + struct tftp_error *errpkt; + int msgsz; + + p = pbuf_alloc(PBUF_TRANSPORT, sizeof(*errpkt), PBUF_RAM); + if (p == NULL) { + return NULL; + } + + errpkt = (struct tftp_error *)p->payload; + errpkt->opcode = PP_HTONS(TFTP_ERROR); + errpkt->errcode = htons(error); + + msgsz = vsnprintf(errpkt->errmsg, sizeof(errpkt->errmsg), fmt, ap); + if (msgsz < 0) { + errpkt->errmsg[0] = '\0'; + msgsz = 1; + } + else if ((size_t)msgsz < sizeof(errpkt->errmsg)) { + ++msgsz; /* for nul byte */ + } + else { + msgsz = sizeof(errpkt->errmsg); /* truncated, includes nul byte */ + } + + pbuf_realloc(p, sizeof(*errpkt) - sizeof(errpkt->errmsg) + msgsz); + return p; +} diff --git a/src/VBox/NetworkServices/NAT/pxdns.c b/src/VBox/NetworkServices/NAT/pxdns.c new file mode 100644 index 00000000..2f3b19f0 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxdns.c @@ -0,0 +1,859 @@ +/* -*- indent-tabs-mode: nil; -*- */ + +/* + * Copyright (C) 2009-2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/* + * Copyright (c) 2003,2004,2005 Armin Wolfermann + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "winutils.h" + +#include "proxy.h" +#include "proxy_pollmgr.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/udp.h" + +#ifndef RT_OS_WINDOWS +#include <sys/poll.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netdb.h> +#else +#include "winpoll.h" +#endif + +#include <stdio.h> +#include <string.h> + + +union sockaddr_inet { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; +}; + + +struct request; + + +/** + * DNS Proxy + */ +struct pxdns { + SOCKET sock4; + SOCKET sock6; + + struct pollmgr_handler pmhdl4; + struct pollmgr_handler pmhdl6; + + struct udp_pcb *pcb4; + struct udp_pcb *pcb6; + + size_t generation; + size_t nresolvers; + union sockaddr_inet *resolvers; + + u16_t id; + + sys_mutex_t lock; + + size_t active_queries; + size_t expired_queries; + size_t late_answers; + size_t hash_collisions; + +#define TIMEOUT 5 + size_t timeout_slot; + u32_t timeout_mask; + struct request *timeout_list[TIMEOUT]; + +#define HASHSIZE 10 +#define HASH(id) ((id) & ((1 << HASHSIZE) - 1)) + struct request *request_hash[1 << HASHSIZE]; +} g_pxdns; + + +struct request { + /** + * Request ID that we use in relayed request. + */ + u16_t id; + + /** + * pxdns::generation used for this request + */ + size_t generation; + + /** + * Current index into pxdns::resolvers + */ + size_t residx; + + /** + * PCB from which we have received this request. lwIP doesn't + * support listening for both IPv4 and IPv6 on the same pcb, so we + * use two and need to keep track. + */ + struct udp_pcb *pcb; + + /** + * Client this request is from and its original request ID. + */ + ipX_addr_t client_addr; + u16_t client_port; + u16_t client_id; + + /** + * Chaining for pxdns::request_hash + */ + struct request **pprev_hash; + struct request *next_hash; + + /** + * Chaining for pxdns::timeout_list + */ + struct request **pprev_timeout; + struct request *next_timeout; + + /** + * Slot in pxdns::timeout_list + */ + size_t timeout_slot; + + /** + * Pbuf with reply received on pollmgr thread. + */ + struct pbuf *reply; + + /** + * Preallocated lwIP message to send reply from the lwIP thread. + */ + struct tcpip_msg msg_reply; + + /** + * Client request. ID is replaced with ours, original saved in + * client_id. Use a copy since we might need to resend and we + * don't want to hold onto pbuf of the request. + */ + size_t size; + u8_t data[1]; +}; + + +static void pxdns_create_resolver_sockaddrs(struct pxdns *pxdns, + const char **nameservers); + +static void pxdns_recv4(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port); +static void pxdns_recv6(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip6_addr_t *addr, u16_t port); +static void pxdns_query(struct pxdns *pxdns, struct udp_pcb *pcb, struct pbuf *p, + ipX_addr_t *addr, u16_t port); +static void pxdns_timer(void *arg); +static int pxdns_rexmit(struct pxdns *pxdns, struct request *req); +static int pxdns_forward_outbound(struct pxdns *pxdns, struct request *req); + +static int pxdns_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents); +static void pxdns_pcb_reply(void *ctx); + +static void pxdns_request_register(struct pxdns *pxdns, struct request *req); +static void pxdns_request_deregister(struct pxdns *pxdns, struct request *req); +static struct request *pxdns_request_find(struct pxdns *pxdns, u16_t id); + +static void pxdns_hash_add(struct pxdns *pxdns, struct request *req); +static void pxdns_hash_del(struct pxdns *pxdns, struct request *req); +static void pxdns_timeout_add(struct pxdns *pxdns, struct request *req); +static void pxdns_timeout_del(struct pxdns *pxdns, struct request *req); + +static void pxdns_request_free(struct request *req); + + +err_t +pxdns_init(struct netif *proxy_netif) +{ + struct pxdns *pxdns = &g_pxdns; + err_t error; + + LWIP_UNUSED_ARG(proxy_netif); + + pxdns->pmhdl4.callback = pxdns_pmgr_pump; + pxdns->pmhdl4.data = (void *)pxdns; + pxdns->pmhdl4.slot = -1; + + pxdns->pmhdl6.callback = pxdns_pmgr_pump; + pxdns->pmhdl6.data = (void *)pxdns; + pxdns->pmhdl6.slot = -1; + + pxdns->pcb4 = udp_new(); + if (pxdns->pcb4 == NULL) { + error = ERR_MEM; + goto err_cleanup_pcb; + } + + pxdns->pcb6 = udp_new_ip6(); + if (pxdns->pcb6 == NULL) { + error = ERR_MEM; + goto err_cleanup_pcb; + } + + error = udp_bind(pxdns->pcb4, IP_ADDR_ANY, 53); + if (error != ERR_OK) { + goto err_cleanup_pcb; + } + + error = udp_bind_ip6(pxdns->pcb6, IP6_ADDR_ANY, 53); + if (error != ERR_OK) { + goto err_cleanup_pcb; + } + + udp_recv(pxdns->pcb4, pxdns_recv4, pxdns); + udp_recv_ip6(pxdns->pcb6, pxdns_recv6, pxdns); + + pxdns->sock4 = socket(AF_INET, SOCK_DGRAM, 0); + if (pxdns->sock4 == INVALID_SOCKET) { + goto err_cleanup_pcb; + } + + pxdns->sock6 = socket(AF_INET6, SOCK_DGRAM, 0); + if (pxdns->sock6 == INVALID_SOCKET) { + /* it's ok if the host doesn't support IPv6 */ + /* XXX: TODO: log */ + } + + pxdns->generation = 0; + pxdns->nresolvers = 0; + pxdns->resolvers = NULL; + pxdns_create_resolver_sockaddrs(pxdns, g_proxy_options->nameservers); + + sys_mutex_new(&pxdns->lock); + + pxdns->timeout_slot = 0; + pxdns->timeout_mask = 0; + + /* NB: assumes pollmgr thread is not running yet */ + pollmgr_add(&pxdns->pmhdl4, pxdns->sock4, POLLIN); + if (pxdns->sock6 != INVALID_SOCKET) { + pollmgr_add(&pxdns->pmhdl6, pxdns->sock6, POLLIN); + } + + return ERR_OK; + + err_cleanup_pcb: + if (pxdns->pcb4 != NULL) { + udp_remove(pxdns->pcb4); + pxdns->pcb4 = NULL; + } + if (pxdns->pcb6 != NULL) { + udp_remove(pxdns->pcb6); + pxdns->pcb4 = NULL; + } + + return error; +} + + +/** + * lwIP thread callback to set the new list of nameservers. + */ +void +pxdns_set_nameservers(void *arg) +{ + const char **nameservers = (const char **)arg; + + if (g_proxy_options->nameservers != NULL) { + RTMemFree(g_proxy_options->nameservers); + } + g_proxy_options->nameservers = nameservers; + + pxdns_create_resolver_sockaddrs(&g_pxdns, nameservers); +} + + +/** + * Use this list of nameservers to resolve guest requests. + * + * Runs on lwIP thread, so no new queries or retramsmits compete with + * it for the use of the existing list of resolvers (to be replaced). + */ +static void +pxdns_create_resolver_sockaddrs(struct pxdns *pxdns, const char **nameservers) +{ + struct addrinfo hints; + union sockaddr_inet *resolvers; + size_t nnames, nresolvers; + const char **p; + int status; + + resolvers = NULL; + nresolvers = 0; + + if (nameservers == NULL) { + goto update_resolvers; + } + + nnames = 0; + for (p = nameservers; *p != NULL; ++p) { + ++nnames; + } + + if (nnames == 0) { + goto update_resolvers; + } + + resolvers = (union sockaddr_inet *)calloc(sizeof(resolvers[0]), nnames); + if (resolvers == NULL) { + nresolvers = 0; + goto update_resolvers; + } + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_DGRAM; + hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV; + + for (p = nameservers; *p != NULL; ++p) { + const char *name = *p; + struct addrinfo *ai; + status = getaddrinfo(name, /* "domain" */ "53", &hints, &ai); + if (status != 0) { + /* XXX: log failed resolution */ + continue; + } + + if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6) { + /* XXX: log unsupported address family */ + freeaddrinfo(ai); + continue; + } + + if (ai->ai_addrlen > sizeof(resolvers[nresolvers])) { + /* XXX: log */ + freeaddrinfo(ai); + continue; + } + + if (ai->ai_family == AF_INET6 && pxdns->sock6 == INVALID_SOCKET) { + /* no IPv6 support on the host, can't use this resolver */ + freeaddrinfo(ai); + continue; + } + + memcpy(&resolvers[nresolvers], ai->ai_addr, ai->ai_addrlen); + freeaddrinfo(ai); + ++nresolvers; + } + + if (nresolvers == 0) { + if (resolvers != NULL) { + free(resolvers); + } + resolvers = NULL; + } + + update_resolvers: + ++pxdns->generation; + if (pxdns->resolvers != NULL) { + free(pxdns->resolvers); + } + pxdns->resolvers = resolvers; + pxdns->nresolvers = nresolvers; +} + + +static void +pxdns_request_free(struct request *req) +{ + LWIP_ASSERT1(req->pprev_hash == NULL); + LWIP_ASSERT1(req->pprev_timeout == NULL); + + if (req->reply != NULL) { + pbuf_free(req->reply); + } + free(req); +} + + +static void +pxdns_hash_add(struct pxdns *pxdns, struct request *req) +{ + struct request **chain; + + LWIP_ASSERT1(req->pprev_hash == NULL); + chain = &pxdns->request_hash[HASH(req->id)]; + if ((req->next_hash = *chain) != NULL) { + (*chain)->pprev_hash = &req->next_hash; + ++pxdns->hash_collisions; + } + *chain = req; + req->pprev_hash = chain; +} + + +static void +pxdns_timeout_add(struct pxdns *pxdns, struct request *req) +{ + struct request **chain; + u32_t omask; + + LWIP_ASSERT1(req->pprev_timeout == NULL); + + req->timeout_slot = pxdns->timeout_slot; + chain = &pxdns->timeout_list[req->timeout_slot]; + if ((req->next_timeout = *chain) != NULL) { + (*chain)->pprev_timeout = &req->next_timeout; + } + *chain = req; + req->pprev_timeout = chain; + + omask = pxdns->timeout_mask; + pxdns->timeout_mask |= 1U << req->timeout_slot; + if (omask == 0) { + sys_timeout(1 * 1000, pxdns_timer, pxdns); + } +} + + +static void +pxdns_hash_del(struct pxdns *pxdns, struct request *req) +{ + LWIP_ASSERT1(req->pprev_hash != NULL); + --pxdns->active_queries; + + if (req->next_hash != NULL) { + req->next_hash->pprev_hash = req->pprev_hash; + } + *req->pprev_hash = req->next_hash; + req->pprev_hash = NULL; + req->next_hash = NULL; +} + + +static void +pxdns_timeout_del(struct pxdns *pxdns, struct request *req) +{ + LWIP_ASSERT1(req->pprev_timeout != NULL); + LWIP_ASSERT1(req->timeout_slot < TIMEOUT); + + if (req->next_timeout != NULL) { + req->next_timeout->pprev_timeout = req->pprev_timeout; + } + *req->pprev_timeout = req->next_timeout; + req->pprev_timeout = NULL; + req->next_timeout = NULL; + + if (pxdns->timeout_list[req->timeout_slot] == NULL) { + pxdns->timeout_mask &= ~(1U << req->timeout_slot); + /* may be on pollmgr thread so no sys_untimeout */ + } +} + + + +/** + * Do bookkeeping on new request. Called from pxdns_query(). + */ +static void +pxdns_request_register(struct pxdns *pxdns, struct request *req) +{ + sys_mutex_lock(&pxdns->lock); + + pxdns_hash_add(pxdns, req); + pxdns_timeout_add(pxdns, req); + ++pxdns->active_queries; + + sys_mutex_unlock(&pxdns->lock); +} + + +static void +pxdns_request_deregister(struct pxdns *pxdns, struct request *req) +{ + sys_mutex_lock(&pxdns->lock); + + pxdns_hash_del(pxdns, req); + pxdns_timeout_del(pxdns, req); + --pxdns->active_queries; + + sys_mutex_unlock(&pxdns->lock); +} + + +/** + * Find request by the id we used when relaying it and remove it from + * id hash and timeout list. Called from pxdns_pmgr_pump() when reply + * comes. + */ +static struct request * +pxdns_request_find(struct pxdns *pxdns, u16_t id) +{ + struct request *req = NULL; + + sys_mutex_lock(&pxdns->lock); + + /* find request in the id->req hash */ + for (req = pxdns->request_hash[HASH(id)]; req != NULL; req = req->next_hash) { + if (req->id == id) { + break; + } + } + + if (req != NULL) { + pxdns_hash_del(pxdns, req); + pxdns_timeout_del(pxdns, req); + --pxdns->active_queries; + } + + sys_mutex_unlock(&pxdns->lock); + return req; +} + + +/** + * Retransmit of g/c expired requests and move timeout slot forward. + */ +static void +pxdns_timer(void *arg) +{ + struct pxdns *pxdns = (struct pxdns *)arg; + struct request **chain, *req; + u32_t mask; + + sys_mutex_lock(&pxdns->lock); + + /* + * Move timeout slot first. New slot points to the list of + * expired requests. If any expired request is retransmitted, we + * keep it on the list (that is now current), effectively + * resetting the timeout. + */ + LWIP_ASSERT1(pxdns->timeout_slot < TIMEOUT); + if (++pxdns->timeout_slot == TIMEOUT) { + pxdns->timeout_slot = 0; + } + + chain = &pxdns->timeout_list[pxdns->timeout_slot]; + req = *chain; + while (req != NULL) { + struct request *expired = req; + req = req->next_timeout; + + if (pxdns_rexmit(pxdns, expired)) { + continue; + } + + pxdns_hash_del(pxdns, expired); + pxdns_timeout_del(pxdns, expired); + ++pxdns->expired_queries; + + pxdns_request_free(expired); + } + + if (pxdns->timeout_list[pxdns->timeout_slot] == NULL) { + pxdns->timeout_mask &= ~(1U << pxdns->timeout_slot); + } + else { + pxdns->timeout_mask |= 1U << pxdns->timeout_slot; + } + mask = pxdns->timeout_mask; + + sys_mutex_unlock(&pxdns->lock); + + if (mask != 0) { + sys_timeout(1 * 1000, pxdns_timer, pxdns); + } +} + + +static void +pxdns_recv4(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct pxdns *pxdns = (struct pxdns *)arg; + pxdns_query(pxdns, pcb, p, ip_2_ipX(addr), port); +} + +static void +pxdns_recv6(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip6_addr_t *addr, u16_t port) +{ + struct pxdns *pxdns = (struct pxdns *)arg; + pxdns_query(pxdns, pcb, p, ip6_2_ipX(addr), port); +} + + +static void +pxdns_query(struct pxdns *pxdns, struct udp_pcb *pcb, struct pbuf *p, + ipX_addr_t *addr, u16_t port) +{ + struct request *req; + int sent; + + if (pxdns->nresolvers == 0) { + /* nothing we can do */ + pbuf_free(p); + return; + } + + req = calloc(1, sizeof(struct request) - 1 + p->tot_len); + if (req == NULL) { + pbuf_free(p); + return; + } + + /* copy request data */ + req->size = p->tot_len; + pbuf_copy_partial(p, req->data, p->tot_len, 0); + + /* save client identity and client's request id */ + req->pcb = pcb; + ipX_addr_copy(PCB_ISIPV6(pcb), req->client_addr, *addr); + req->client_port = port; + memcpy(&req->client_id, req->data, sizeof(req->client_id)); + + /* slap our request id onto it */ + req->id = pxdns->id++; + memcpy(req->data, &req->id, sizeof(u16_t)); + + /* resolver to forward to */ + req->generation = pxdns->generation; + req->residx = 0; + + /* prepare for relaying the reply back to guest */ + req->msg_reply.type = TCPIP_MSG_CALLBACK_STATIC; + req->msg_reply.sem = NULL; + req->msg_reply.msg.cb.function = pxdns_pcb_reply; + req->msg_reply.msg.cb.ctx = (void *)req; + + DPRINTF2(("%s: req=%p: client id %d -> id %d\n", + __func__, (void *)req, req->client_id, req->id)); + + pxdns_request_register(pxdns, req); + + sent = pxdns_forward_outbound(pxdns, req); + if (!sent) { + sent = pxdns_rexmit(pxdns, req); + } + if (!sent) { + pxdns_request_deregister(pxdns, req); + pxdns_request_free(req); + } +} + + +/** + * Forward request to the req::residx resolver in the pxdns::resolvers + * array of upstream resolvers. + * + * Returns 1 on success, 0 on failure. + */ +static int +pxdns_forward_outbound(struct pxdns *pxdns, struct request *req) +{ + union sockaddr_inet *resolver; + ssize_t nsent; + + DPRINTF2(("%s: req %p: sending to resolver #%lu\n", + __func__, (void *)req, (unsigned long)req->residx)); + + LWIP_ASSERT1(req->generation == pxdns->generation); + LWIP_ASSERT1(req->residx < pxdns->nresolvers); + resolver = &pxdns->resolvers[req->residx]; + + if (resolver->sa.sa_family == AF_INET) { + nsent = sendto(pxdns->sock4, req->data, req->size, 0, + &resolver->sa, sizeof(resolver->sin)); + + } + else if (resolver->sa.sa_family == AF_INET6) { + if (pxdns->sock6 != INVALID_SOCKET) { + nsent = sendto(pxdns->sock6, req->data, req->size, 0, + &resolver->sa, sizeof(resolver->sin6)); + } + else { + /* shouldn't happen, we should have weeded out IPv6 resolvers */ + return 0; + } + } + else { + /* shouldn't happen, we should have weeded out unsupported families */ + return 0; + } + + if ((size_t)nsent == req->size) { + return 1; /* sent */ + } + + if (nsent < 0) { + DPRINTF2(("%s: send: errno %d\n", __func__, errno)); + } + else { + DPRINTF2(("%s: sent only %lu of %lu\n", + __func__, (unsigned long)nsent, (unsigned long)req->size)); + } + return 0; /* not sent, caller will retry as necessary */ +} + + +/** + * Forward request to the next resolver in the pxdns::resolvers array + * of upstream resolvers if there are any left. + */ +static int +pxdns_rexmit(struct pxdns *pxdns, struct request *req) +{ + int sent; + + if (/* __predict_false */ req->generation != pxdns->generation) { + DPRINTF2(("%s: req %p: generation %lu != pxdns generation %lu\n", + __func__, (void *)req, + (unsigned long)req->generation, + (unsigned long)pxdns->generation)); + return 0; + } + + LWIP_ASSERT1(req->residx < pxdns->nresolvers); + do { + if (++req->residx == pxdns->nresolvers) { + return 0; + } + + sent = pxdns_forward_outbound(pxdns, req); + } while (!sent); + + return 1; +} + + +static int +pxdns_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxdns *pxdns; + struct request *req; + ssize_t nread; + err_t error; + u16_t id; + + pxdns = (struct pxdns *)handler->data; + LWIP_ASSERT1(handler == &pxdns->pmhdl4 || handler == &pxdns->pmhdl6); + LWIP_ASSERT1(fd == (handler == &pxdns->pmhdl4 ? pxdns->sock4 : pxdns->sock6)); + + if (revents & ~(POLLIN|POLLERR)) { + DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents)); + return POLLIN; + } + + if (revents & POLLERR) { + int sockerr = -1; + socklen_t optlen = (socklen_t)sizeof(sockerr); + int status; + + status = getsockopt(fd, SOL_SOCKET, + SO_ERROR, (char *)&sockerr, &optlen); + if (status < 0) { + DPRINTF(("%s: sock %d: SO_ERROR failed with errno %d\n", + __func__, fd, errno)); + } + else { + DPRINTF(("%s: sock %d: errno %d\n", + __func__, fd, sockerr)); + } + } + + if ((revents & POLLIN) == 0) { + return POLLIN; + } + + + nread = recv(fd, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); + if (nread < 0) { + perror(__func__); + return POLLIN; + } + + /* check for minimum dns packet length */ + if (nread < 12) { + DPRINTF2(("%s: short reply %lu bytes\n", + __func__, (unsigned long)nread)); + return POLLIN; + } + + /* XXX: shall we proxy back RCODE=Refused responses? */ + + memcpy(&id, pollmgr_udpbuf, sizeof(id)); + req = pxdns_request_find(pxdns, id); + if (req == NULL) { + DPRINTF2(("%s: orphaned reply for %d\n", __func__, id)); + ++pxdns->late_answers; + return POLLIN; + } + + DPRINTF2(("%s: reply for req=%p: id %d -> client id %d\n", + __func__, (void *)req, req->id, req->client_id)); + + req->reply = pbuf_alloc(PBUF_RAW, nread, PBUF_RAM); + if (req->reply == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread)); + pxdns_request_free(req); + return POLLIN; + } + + memcpy(pollmgr_udpbuf, &req->client_id, sizeof(req->client_id)); + error = pbuf_take(req->reply, pollmgr_udpbuf, nread); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread)); + pxdns_request_free(req); + return POLLIN; + } + + proxy_lwip_post(&req->msg_reply); + return POLLIN; +} + + +/** + * Called on lwIP thread via request::msg_reply callback. + */ +static void +pxdns_pcb_reply(void *ctx) +{ + struct request *req = (struct request *)ctx; + err_t error; + + error = udp_sendto(req->pcb, req->reply, + ipX_2_ip(&req->client_addr), req->client_port); + if (error != ERR_OK) { + DPRINTF(("%s: udp_sendto err %s\n", + __func__, proxy_lwip_strerr(error))); + } + + pxdns_request_free(req); +} diff --git a/src/VBox/NetworkServices/NAT/pxping.c b/src/VBox/NetworkServices/NAT/pxping.c new file mode 100644 index 00000000..0c3672d5 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxping.c @@ -0,0 +1,1937 @@ +/* -*- indent-tabs-mode: nil; -*- */ + +#include "winutils.h" +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxremap.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#ifdef RT_OS_DARWIN +# define __APPLE_USE_RFC_3542 +#endif +#include <netinet/in.h> +#include <arpa/inet.h> /* XXX: inet_ntop */ +#include <poll.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#else +#include <iprt/stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/inet_chksum.h" +#include "lwip/ip.h" +#include "lwip/icmp.h" + +#if defined(RT_OS_LINUX) && !defined(__USE_GNU) +#if __GLIBC_PREREQ(2, 8) +/* + * XXX: This is gross. in6_pktinfo is now hidden behind _GNU_SOURCE + * https://sourceware.org/bugzilla/show_bug.cgi?id=6775 + * + * But in older glibc versions, e.g. RHEL5, it is not! I don't want + * to deal with _GNU_SOURCE now, so as a kludge check for glibc + * version. It seems the __USE_GNU guard was introduced in 2.8. + */ +struct in6_pktinfo { + struct in6_addr ipi6_addr; + unsigned int ipi6_ifindex; +}; +#endif /* __GLIBC_PREREQ */ +#endif /* RT_OS_LINUX && !__USE_GNU */ + + +/* forward */ +struct ping_pcb; + + +/** + * Global state for ping proxy collected in one entity to minimize + * globals. There's only one instance of this structure. + * + * Raw ICMP sockets are promiscuous, so it doesn't make sense to have + * multiple. If this code ever needs to support multiple netifs, the + * netif member should be exiled into "pcb". + */ +struct pxping { + SOCKET sock4; + +#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) +# define DF_WITH_IP_HDRINCL + int hdrincl; +#else + int df; +#endif + int ttl; + int tos; + + SOCKET sock6; +#ifdef RT_OS_WINDOWS + LPFN_WSARECVMSG pfWSARecvMsg6; +#endif + int hopl; + + struct pollmgr_handler pmhdl4; + struct pollmgr_handler pmhdl6; + + struct netif *netif; + + /** + * Protect lwIP and pmgr accesses to the list of pcbs. + */ + sys_mutex_t lock; + + /* + * We need to find pcbs both from the guest side and from the host + * side. If we need to support industrial grade ping throughput, + * we will need two pcb hashes. For now, a short linked list + * should be enough. Cf. pxping_pcb_for_request() and + * pxping_pcb_for_reply(). + */ +#define PXPING_MAX_PCBS 8 + size_t npcbs; + struct ping_pcb *pcbs; + +#define TIMEOUT 5 + int timer_active; + size_t timeout_slot; + struct ping_pcb *timeout_list[TIMEOUT]; +}; + + +/** + * Quasi PCB for ping. + */ +struct ping_pcb { + ipX_addr_t src; + ipX_addr_t dst; + + u8_t is_ipv6; + u8_t is_mapped; + + u16_t guest_id; + u16_t host_id; + + /** + * Desired slot in pxping::timeout_list. See pxping_timer(). + */ + size_t timeout_slot; + + /** + * Chaining for pxping::timeout_list + */ + struct ping_pcb **pprev_timeout; + struct ping_pcb *next_timeout; + + /** + * Chaining for pxping::pcbs + */ + struct ping_pcb *next; + + union { + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } peer; +}; + + +/** + * lwIP thread callback message for IPv4 ping. + * + * We pass raw IP datagram for ip_output_if() so we only need pbuf and + * netif (from pxping). + */ +struct ping_msg { + struct tcpip_msg msg; + struct pxping *pxping; + struct pbuf *p; +}; + + +/** + * lwIP thread callback message for IPv6 ping. + * + * We cannot obtain raw IPv6 datagram from host without extra trouble, + * so we pass ICMPv6 payload in pbuf and also other parameters to + * ip6_output_if(). + */ +struct ping6_msg { + struct tcpip_msg msg; + struct pxping *pxping; + struct pbuf *p; + ip6_addr_t src, dst; + int hopl, tclass; +}; + + +#ifdef RT_OS_WINDOWS +static int pxping_init_windows(struct pxping *pxping); +#endif +static void pxping_recv4(void *arg, struct pbuf *p); +static void pxping_recv6(void *arg, struct pbuf *p); + +static void pxping_timer(void *arg); +static void pxping_timer_needed(struct pxping *pxping); + +static struct ping_pcb *pxping_pcb_for_request(struct pxping *pxping, + int is_ipv6, + ipX_addr_t *src, ipX_addr_t *dst, + u16_t guest_id); +static struct ping_pcb *pxping_pcb_for_reply(struct pxping *pxping, int is_ipv6, + ipX_addr_t *dst, u16_t host_id); + +static struct ping_pcb *pxping_pcb_allocate(struct pxping *pxping); +static void pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb); +static void pxping_pcb_debug_print(struct ping_pcb *pcb); + +static int pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents); + +static void pxping_pmgr_icmp4(struct pxping *pxping); +static void pxping_pmgr_icmp4_echo(struct pxping *pxping, + u16_t iplen, struct sockaddr_in *peer); +static void pxping_pmgr_icmp4_error(struct pxping *pxping, + u16_t iplen, struct sockaddr_in *peer); +static void pxping_pmgr_icmp6(struct pxping *pxping); +static void pxping_pmgr_icmp6_echo(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + int hopl, int tclass, u16_t icmplen); +static void pxping_pmgr_icmp6_error(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + int hopl, int tclass, u16_t icmplen); + +static void pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen); +static void pxping_pcb_forward_inbound(void *arg); + +static void pxping_pmgr_forward_inbound6(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + u8_t hopl, u8_t tclass, + u16_t icmplen); +static void pxping_pcb_forward_inbound6(void *arg); + +/* + * NB: This is not documented except in RTFS. + * + * If ip_output_if() is passed dest == NULL then it treats p as + * complete IP packet with payload pointing to the IP header. It does + * not build IP header, ignores all header-related arguments, fetches + * real destination from the header in the pbuf and outputs pbuf to + * the specified netif. + */ +#define ip_raw_output_if(p, netif) \ + (ip_output_if((p), NULL, NULL, 0, 0, 0, (netif))) + + + +static struct pxping g_pxping; + + +err_t +pxping_init(struct netif *netif, SOCKET sock4, SOCKET sock6) +{ + const int on = 1; + int status; + + if (sock4 == INVALID_SOCKET && sock6 == INVALID_SOCKET) { + return ERR_VAL; + } + + g_pxping.netif = netif; + sys_mutex_new(&g_pxping.lock); + + g_pxping.sock4 = sock4; + if (g_pxping.sock4 != INVALID_SOCKET) { +#ifdef DF_WITH_IP_HDRINCL + g_pxping.hdrincl = -1; +#else + g_pxping.df = -1; +#endif + g_pxping.ttl = -1; + g_pxping.tos = 0; + +#ifdef RT_OS_LINUX + { + const int dont = IP_PMTUDISC_DONT; + status = setsockopt(sock4, IPPROTO_IP, IP_MTU_DISCOVER, + &dont, sizeof(dont)); + if (status != 0) { + perror("IP_MTU_DISCOVER"); + } + } +#endif /* RT_OS_LINUX */ + + g_pxping.pmhdl4.callback = pxping_pmgr_pump; + g_pxping.pmhdl4.data = (void *)&g_pxping; + g_pxping.pmhdl4.slot = -1; + pollmgr_add(&g_pxping.pmhdl4, g_pxping.sock4, POLLIN); + + ping_proxy_accept(pxping_recv4, &g_pxping); + } + + g_pxping.sock6 = sock6; +#ifdef RT_OS_WINDOWS + /* we need recvmsg */ + if (g_pxping.sock6 != INVALID_SOCKET) { + status = pxping_init_windows(&g_pxping); + if (status == SOCKET_ERROR) { + g_pxping.sock6 = INVALID_SOCKET; + /* close(sock6); */ + } + } +#endif + if (g_pxping.sock6 != INVALID_SOCKET) { + g_pxping.hopl = -1; + +#if !defined(IPV6_RECVPKTINFO) +#define IPV6_RECVPKTINFO (IPV6_PKTINFO) +#endif + status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVPKTINFO, + (const char *)&on, sizeof(on)); + if (status < 0) { + perror("IPV6_RECVPKTINFO"); + /* XXX: for now this is fatal */ + } + +#if !defined(IPV6_RECVHOPLIMIT) +#define IPV6_RECVHOPLIMIT (IPV6_HOPLIMIT) +#endif + status = setsockopt(sock6, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, + (const char *)&on, sizeof(on)); + if (status < 0) { + perror("IPV6_RECVHOPLIMIT"); + } + +#ifdef IPV6_RECVTCLASS /* new in RFC 3542, there's no RFC 2292 counterpart */ + /* TODO: IPV6_RECVTCLASS */ +#endif + + g_pxping.pmhdl6.callback = pxping_pmgr_pump; + g_pxping.pmhdl6.data = (void *)&g_pxping; + g_pxping.pmhdl6.slot = -1; + pollmgr_add(&g_pxping.pmhdl6, g_pxping.sock6, POLLIN); + + ping6_proxy_accept(pxping_recv6, &g_pxping); + } + + return ERR_OK; +} + + +#ifdef RT_OS_WINDOWS +static int +pxping_init_windows(struct pxping *pxping) +{ + GUID WSARecvMsgGUID = WSAID_WSARECVMSG; + DWORD nread; + int status; + + pxping->pfWSARecvMsg6 = NULL; + status = WSAIoctl(pxping->sock6, + SIO_GET_EXTENSION_FUNCTION_POINTER, + &WSARecvMsgGUID, sizeof(WSARecvMsgGUID), + &pxping->pfWSARecvMsg6, sizeof(pxping->pfWSARecvMsg6), + &nread, + NULL, NULL); + return status; +} +#endif /* RT_OS_WINDOWS */ + + +static u32_t +chksum_delta_16(u16_t oval, u16_t nval) +{ + u32_t sum = (u16_t)~oval; + sum += nval; + return sum; +} + + +static u32_t +chksum_update_16(u16_t *oldp, u16_t nval) +{ + u32_t sum = chksum_delta_16(*oldp, nval); + *oldp = nval; + return sum; +} + + +static u32_t +chksum_delta_32(u32_t oval, u32_t nval) +{ + u32_t sum = ~oval; + sum = FOLD_U32T(sum); + sum += FOLD_U32T(nval); + return sum; +} + + +static u32_t +chksum_update_32(u32_t *oldp, u32_t nval) +{ + u32_t sum = chksum_delta_32(*oldp, nval); + *oldp = nval; + return sum; +} + + +static u32_t +chksum_delta_ipv6(const ip6_addr_t *oldp, const ip6_addr_t *newp) +{ + u32_t sum; + + sum = chksum_delta_32(oldp->addr[0], newp->addr[0]); + sum += chksum_delta_32(oldp->addr[1], newp->addr[1]); + sum += chksum_delta_32(oldp->addr[2], newp->addr[2]); + sum += chksum_delta_32(oldp->addr[3], newp->addr[3]); + + return sum; +} + + +static u32_t +chksum_update_ipv6(ip6_addr_t *oldp, const ip6_addr_t *newp) +{ + u32_t sum; + + sum = chksum_update_32(&oldp->addr[0], newp->addr[0]); + sum += chksum_update_32(&oldp->addr[1], newp->addr[1]); + sum += chksum_update_32(&oldp->addr[2], newp->addr[2]); + sum += chksum_update_32(&oldp->addr[3], newp->addr[3]); + + return sum; +} + + +/** + * ICMP Echo Request in pbuf "p" is to be proxied. + */ +static void +pxping_recv4(void *arg, struct pbuf *p) +{ + struct pxping *pxping = (struct pxping *)arg; + struct ping_pcb *pcb; +#ifdef DF_WITH_IP_HDRINCL + struct ip_hdr iph_orig; +#endif + struct icmp_echo_hdr icmph_orig; + struct ip_hdr *iph; + struct icmp_echo_hdr *icmph; + int df, ttl, tos; + u32_t sum; + u16_t iphlen; + int status; + + iphlen = ip_current_header_tot_len(); + if (iphlen != IP_HLEN) { /* we don't do options */ + pbuf_free(p); + return; + } + + iph = (/* UNCONST */ struct ip_hdr *)ip_current_header(); + icmph = (struct icmp_echo_hdr *)p->payload; + + pcb = pxping_pcb_for_request(pxping, 0, + ipX_current_src_addr(), + ipX_current_dest_addr(), + icmph->id); + if (pcb == NULL) { + pbuf_free(p); + return; + } + + pxping_pcb_debug_print(pcb); /* XXX */ + DPRINTF((" seq %d len %u ttl %d\n", + ntohs(icmph->seqno), (unsigned int)p->tot_len, + IPH_TTL(iph))); + + ttl = IPH_TTL(iph); + if (!pcb->is_mapped) { + if (RT_UNLIKELY(ttl == 1)) { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_LIKELY(status == 0)) { + icmp_time_exceeded(p, ICMP_TE_TTL); + } + pbuf_free(p); + return; + } + --ttl; + } + + /* + * OS X doesn't provide a socket option to control fragmentation. + * Solaris doesn't provide IP_DONTFRAG on all releases we support. + * In this case we have to use IP_HDRINCL. We don't want to use + * it always since it doesn't handle fragmentation (but that's ok + * for DF) and Windows doesn't do automatic source address + * selection with IP_HDRINCL. + */ + df = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0; + +#ifdef DF_WITH_IP_HDRINCL + if (df != pxping->hdrincl) { + status = setsockopt(pxping->sock4, IPPROTO_IP, IP_HDRINCL, + &df, sizeof(df)); + if (RT_LIKELY(status == 0)) { + pxping->hdrincl = df; + } + else { + perror("IP_HDRINCL"); + } + } + + if (pxping->hdrincl) { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_UNLIKELY(status != 0)) { + pbuf_free(p); + return; + } + + /* we will overwrite IP header, save original for ICMP errors */ + memcpy(&iph_orig, iph, iphlen); + + if (g_proxy_options->src4 != NULL) { + memcpy(&iph->src, &g_proxy_options->src4->sin_addr, + sizeof(g_proxy_options->src4->sin_addr)); + } + else { + /* let the kernel select suitable source address */ + memset(&iph->src, 0, sizeof(iph->src)); + } + + IPH_TTL_SET(iph, ttl); /* already decremented */ + IPH_ID_SET(iph, 0); /* kernel will set one */ +#ifdef RT_OS_DARWIN + /* wants ip_offset and ip_len fields in host order */ + IPH_OFFSET_SET(iph, ntohs(IPH_OFFSET(iph))); + IPH_LEN_SET(iph, ntohs(IPH_LEN(iph))); + /* wants checksum of everything (sic!), in host order */ + sum = inet_chksum_pbuf(p); + IPH_CHKSUM_SET(iph, sum); +#else /* !RT_OS_DARWIN */ + IPH_CHKSUM_SET(iph, 0); /* kernel will recalculate */ +#endif + } + else /* !pxping->hdrincl */ +#endif /* DF_WITH_IP_HDRINCL */ + { +#if !defined(DF_WITH_IP_HDRINCL) + /* control DF flag via setsockopt(2) */ +#define USE_DF_OPTION(_Optname) \ + const int dfopt = _Optname; \ + const char * const dfoptname = #_Optname; +#if defined(RT_OS_LINUX) + USE_DF_OPTION(IP_MTU_DISCOVER); + df = df ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; +#elif defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD) + USE_DF_OPTION(IP_DONTFRAG); +#elif defined(RT_OS_WINDOWS) + USE_DF_OPTION(IP_DONTFRAGMENT); +#endif + if (df != pxping->df) { + status = setsockopt(pxping->sock4, IPPROTO_IP, dfopt, + (char *)&df, sizeof(df)); + if (RT_LIKELY(status == 0)) { + pxping->df = df; + } + else { + perror(dfoptname); + } + } +#endif /* !DF_WITH_IP_HDRINCL */ + + if (ttl != pxping->ttl) { + status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TTL, + (char *)&ttl, sizeof(ttl)); + if (RT_LIKELY(status == 0)) { + pxping->ttl = ttl; + } + else { + perror("IP_TTL"); + } + } + + tos = IPH_TOS(iph); + if (tos != pxping->tos) { + status = setsockopt(pxping->sock4, IPPROTO_IP, IP_TOS, + (char *)&tos, sizeof(tos)); + if (RT_LIKELY(status == 0)) { + pxping->tos = tos; + } + else { + perror("IP_TOS"); + } + } + } + + /* rewrite ICMP echo header */ + memcpy(&icmph_orig, icmph, sizeof(*icmph)); + sum = (u16_t)~icmph->chksum; + sum += chksum_update_16(&icmph->id, pcb->host_id); + sum = FOLD_U32T(sum); + icmph->chksum = ~sum; + + status = proxy_sendto(pxping->sock4, p, + &pcb->peer.sin, sizeof(pcb->peer.sin)); + if (status != 0) { + int error = -status; + DPRINTF(("%s: sendto errno %d\n", __func__, error)); + +#ifdef DF_WITH_IP_HDRINCL + if (pxping->hdrincl) { + /* restore original IP header */ + memcpy(iph, &iph_orig, iphlen); + } + else +#endif + { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_UNLIKELY(status != 0)) { + pbuf_free(p); + return; + } + } + + /* restore original ICMP header */ + memcpy(icmph, &icmph_orig, sizeof(*icmph)); + + /* + * Some ICMP errors may be generated by the kernel and we read + * them from the socket and forward them normally, hence the + * ifdefs below. + */ + switch (error) { + +#if !( defined(RT_OS_SOLARIS) \ + || (defined(RT_OS_LINUX) && !defined(DF_WITH_IP_HDRINCL)) \ + ) + case EMSGSIZE: + icmp_dest_unreach(p, ICMP_DUR_FRAG); + break; +#endif + + case ENETDOWN: + case ENETUNREACH: + icmp_dest_unreach(p, ICMP_DUR_NET); + break; + + case EHOSTDOWN: + case EHOSTUNREACH: + icmp_dest_unreach(p, ICMP_DUR_HOST); + break; + } + } + + pbuf_free(p); +} + + +/** + * ICMPv6 Echo Request in pbuf "p" is to be proxied. + */ +static void +pxping_recv6(void *arg, struct pbuf *p) +{ + struct pxping *pxping = (struct pxping *)arg; + struct ping_pcb *pcb; + struct ip6_hdr *iph; + struct icmp6_echo_hdr *icmph; + int hopl; + u16_t iphlen; + u16_t id, seq; + int status; + + iph = (/* UNCONST */ struct ip6_hdr *)ip6_current_header(); + iphlen = ip_current_header_tot_len(); + + icmph = (struct icmp6_echo_hdr *)p->payload; + + id = icmph->id; + seq = icmph->seqno; + + pcb = pxping_pcb_for_request(pxping, 1, + ipX_current_src_addr(), + ipX_current_dest_addr(), + id); + if (pcb == NULL) { + pbuf_free(p); + return; + } + + pxping_pcb_debug_print(pcb); /* XXX */ + DPRINTF((" seq %d len %u hopl %d\n", + ntohs(seq), (unsigned int)p->tot_len, + IP6H_HOPLIM(iph))); + + hopl = IP6H_HOPLIM(iph); + if (!pcb->is_mapped) { + if (hopl == 1) { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_LIKELY(status == 0)) { + icmp6_time_exceeded(p, ICMP6_TE_HL); + } + pbuf_free(p); + return; + } + --hopl; + } + + /* + * Rewrite ICMPv6 echo header. We don't need to recompute the + * checksum since, unlike IPv4, checksum includes pseudo-header. + * OS computes checksum for us on send() since it needs to select + * source address. + */ + icmph->id = pcb->host_id; + + /* TODO: use control messages to save a syscall? */ + if (hopl != pxping->hopl) { + status = setsockopt(pxping->sock6, IPPROTO_IPV6, IPV6_UNICAST_HOPS, + (char *)&hopl, sizeof(hopl)); + if (status == 0) { + pxping->hopl = hopl; + } + else { + perror("IPV6_HOPLIMIT"); + } + } + + status = proxy_sendto(pxping->sock6, p, + &pcb->peer.sin6, sizeof(pcb->peer.sin6)); + if (status != 0) { + int error = -status; + DPRINTF(("%s: sendto errno %d\n", __func__, error)); + + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_UNLIKELY(status != 0)) { + pbuf_free(p); + return; + } + + /* restore original ICMP header */ + icmph->id = pcb->guest_id; + + switch (error) { + case EACCES: + icmp6_dest_unreach(p, ICMP6_DUR_PROHIBITED); + break; + +#ifdef ENONET + case ENONET: +#endif + case ENETDOWN: + case ENETUNREACH: + case EHOSTDOWN: + case EHOSTUNREACH: + icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE); + break; + } + } + + pbuf_free(p); +} + + +static void +pxping_pcb_debug_print(struct ping_pcb *pcb) +{ + char addrbuf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; + const char *addrstr; + int sdom = pcb->is_ipv6 ? AF_INET6 : AF_INET; + + DPRINTF(("ping %p:", (void *)pcb)); + + addrstr = inet_ntop(sdom, (void *)&pcb->src, addrbuf, sizeof(addrbuf)); + DPRINTF((" %s", addrstr)); + + DPRINTF((" ->")); + + addrstr = inet_ntop(sdom, (void *)&pcb->dst, addrbuf, sizeof(addrbuf)); + DPRINTF((" %s", addrstr)); + + DPRINTF((" id %04x->%04x", ntohs(pcb->guest_id), ntohs(pcb->host_id))); +} + + +static struct ping_pcb * +pxping_pcb_allocate(struct pxping *pxping) +{ + struct ping_pcb *pcb; + + if (pxping->npcbs >= PXPING_MAX_PCBS) { + return NULL; + } + + pcb = (struct ping_pcb *)malloc(sizeof(*pcb)); + if (pcb == NULL) { + return NULL; + } + + ++pxping->npcbs; + return pcb; +} + + +static void +pxping_pcb_delete(struct pxping *pxping, struct ping_pcb *pcb) +{ + LWIP_ASSERT1(pxping->npcbs > 0); + LWIP_ASSERT1(pcb->next == NULL); + LWIP_ASSERT1(pcb->pprev_timeout == NULL); + + DPRINTF(("%s: ping %p\n", __func__, (void *)pcb)); + + --pxping->npcbs; + free(pcb); +} + + +static void +pxping_timeout_add(struct pxping *pxping, struct ping_pcb *pcb) +{ + struct ping_pcb **chain; + + LWIP_ASSERT1(pcb->pprev_timeout == NULL); + + chain = &pxping->timeout_list[pcb->timeout_slot]; + if ((pcb->next_timeout = *chain) != NULL) { + (*chain)->pprev_timeout = &pcb->next_timeout; + } + *chain = pcb; + pcb->pprev_timeout = chain; +} + + +static void +pxping_timeout_del(struct pxping *pxping, struct ping_pcb *pcb) +{ + LWIP_UNUSED_ARG(pxping); + + LWIP_ASSERT1(pcb->pprev_timeout != NULL); + if (pcb->next_timeout != NULL) { + pcb->next_timeout->pprev_timeout = pcb->pprev_timeout; + } + *pcb->pprev_timeout = pcb->next_timeout; + pcb->pprev_timeout = NULL; + pcb->next_timeout = NULL; +} + + +static void +pxping_pcb_register(struct pxping *pxping, struct ping_pcb *pcb) +{ + pcb->next = pxping->pcbs; + pxping->pcbs = pcb; + + pxping_timeout_add(pxping, pcb); +} + + +static void +pxping_pcb_deregister(struct pxping *pxping, struct ping_pcb *pcb) +{ + struct ping_pcb **p; + + for (p = &pxping->pcbs; *p != NULL; p = &(*p)->next) { + if (*p == pcb) { + *p = pcb->next; + pcb->next = NULL; + break; + } + } + + pxping_timeout_del(pxping, pcb); +} + + +static struct ping_pcb * +pxping_pcb_for_request(struct pxping *pxping, + int is_ipv6, ipX_addr_t *src, ipX_addr_t *dst, + u16_t guest_id) +{ + struct ping_pcb *pcb; + + /* on lwip thread, so no concurrent updates */ + for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) { + if (pcb->guest_id == guest_id + && pcb->is_ipv6 == is_ipv6 + && ipX_addr_cmp(is_ipv6, &pcb->dst, dst) + && ipX_addr_cmp(is_ipv6, &pcb->src, src)) + { + break; + } + } + + if (pcb == NULL) { + int mapped; + + pcb = pxping_pcb_allocate(pxping); + if (pcb == NULL) { + return NULL; + } + + pcb->is_ipv6 = is_ipv6; + ipX_addr_copy(is_ipv6, pcb->src, *src); + ipX_addr_copy(is_ipv6, pcb->dst, *dst); + + pcb->guest_id = guest_id; +#ifdef RT_OS_WINDOWS +# define random() (rand()) +#endif + pcb->host_id = random() & 0xffffUL; + + pcb->pprev_timeout = NULL; + pcb->next_timeout = NULL; + + if (is_ipv6) { + pcb->peer.sin6.sin6_family = AF_INET6; +#if HAVE_SA_LEN + pcb->peer.sin6.sin6_len = sizeof(pcb->peer.sin6); +#endif + pcb->peer.sin6.sin6_port = htons(IPPROTO_ICMPV6); + pcb->peer.sin6.sin6_flowinfo = 0; + mapped = pxremap_outbound_ip6((ip6_addr_t *)&pcb->peer.sin6.sin6_addr, + ipX_2_ip6(&pcb->dst)); + } + else { + pcb->peer.sin.sin_family = AF_INET; +#if HAVE_SA_LEN + pcb->peer.sin.sin_len = sizeof(pcb->peer.sin); +#endif + pcb->peer.sin.sin_port = htons(IPPROTO_ICMP); + mapped = pxremap_outbound_ip4((ip_addr_t *)&pcb->peer.sin.sin_addr, + ipX_2_ip(&pcb->dst)); + } + + if (mapped == PXREMAP_FAILED) { + free(pcb); + return NULL; + } + else { + pcb->is_mapped = (mapped == PXREMAP_MAPPED); + } + + pcb->timeout_slot = pxping->timeout_slot; + + sys_mutex_lock(&pxping->lock); + pxping_pcb_register(pxping, pcb); + sys_mutex_unlock(&pxping->lock); + + pxping_pcb_debug_print(pcb); /* XXX */ + DPRINTF((" - created\n")); + + pxping_timer_needed(pxping); + } + else { + /* just bump up expiration timeout lazily */ + pxping_pcb_debug_print(pcb); /* XXX */ + DPRINTF((" - slot %d -> %d\n", + (unsigned int)pcb->timeout_slot, + (unsigned int)pxping->timeout_slot)); + pcb->timeout_slot = pxping->timeout_slot; + } + + return pcb; +} + + +/** + * Called on pollmgr thread. Caller must do the locking since caller + * is going to use the returned pcb, which needs to be protected from + * being expired by pxping_timer() on lwip thread. + */ +static struct ping_pcb * +pxping_pcb_for_reply(struct pxping *pxping, + int is_ipv6, ipX_addr_t *dst, u16_t host_id) +{ + struct ping_pcb *pcb; + + for (pcb = pxping->pcbs; pcb != NULL; pcb = pcb->next) { + if (pcb->host_id == host_id + && pcb->is_ipv6 == is_ipv6 + /* XXX: allow broadcast pings? */ + && ipX_addr_cmp(is_ipv6, &pcb->dst, dst)) + { + return pcb; + } + } + + return NULL; +} + + +static void +pxping_timer(void *arg) +{ + struct pxping *pxping = (struct pxping *)arg; + struct ping_pcb **chain, *pcb; + + pxping->timer_active = 0; + + /* + * New slot points to the list of pcbs to check for expiration. + */ + LWIP_ASSERT1(pxping->timeout_slot < TIMEOUT); + if (++pxping->timeout_slot == TIMEOUT) { + pxping->timeout_slot = 0; + } + + chain = &pxping->timeout_list[pxping->timeout_slot]; + pcb = *chain; + + /* protect from pollmgr concurrent reads */ + sys_mutex_lock(&pxping->lock); + + while (pcb != NULL) { + struct ping_pcb *xpcb = pcb; + pcb = pcb->next_timeout; + + if (xpcb->timeout_slot == pxping->timeout_slot) { + /* expired */ + pxping_pcb_deregister(pxping, xpcb); + pxping_pcb_delete(pxping, xpcb); + } + else { + /* + * If there was another request, we updated timeout_slot + * but delayed actually moving the pcb until now. + */ + pxping_timeout_del(pxping, xpcb); /* from current slot */ + pxping_timeout_add(pxping, xpcb); /* to new slot */ + } + } + + sys_mutex_unlock(&pxping->lock); + pxping_timer_needed(pxping); +} + + +static void +pxping_timer_needed(struct pxping *pxping) +{ + if (!pxping->timer_active && pxping->pcbs != NULL) { + pxping->timer_active = 1; + sys_timeout(1 * 1000, pxping_timer, pxping); + } +} + + +static int +pxping_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxping *pxping; + + pxping = (struct pxping *)handler->data; + LWIP_ASSERT1(fd == pxping->sock4 || fd == pxping->sock6); + + if (revents & ~(POLLIN|POLLERR)) { + DPRINTF0(("%s: unexpected revents 0x%x\n", __func__, revents)); + return POLLIN; + } + + if (revents & POLLERR) { + int sockerr = -1; + socklen_t optlen = (socklen_t)sizeof(sockerr); + int status; + + status = getsockopt(fd, SOL_SOCKET, + SO_ERROR, (char *)&sockerr, &optlen); + if (status < 0) { + DPRINTF(("%s: sock %d: SO_ERROR failed with errno %d\n", + __func__, fd, errno)); + } + else { + DPRINTF(("%s: sock %d: errno %d\n", + __func__, fd, sockerr)); + } + } + + if ((revents & POLLIN) == 0) { + return POLLIN; + } + + if (fd == pxping->sock4) { + pxping_pmgr_icmp4(pxping); + } + else /* fd == pxping->sock6 */ { + pxping_pmgr_icmp6(pxping); + } + + return POLLIN; +} + + +/** + * Process incoming ICMP message for the host. + * NB: we will get a lot of spam here and have to sift through it. + */ +static void +pxping_pmgr_icmp4(struct pxping *pxping) +{ + struct sockaddr_in sin; + socklen_t salen = sizeof(sin); + ssize_t nread; + struct ip_hdr *iph; + struct icmp_echo_hdr *icmph; + u16_t iplen; + + memset(&sin, 0, sizeof(sin)); + + /* + * Reads from raw IPv4 sockets deliver complete IP datagrams with + * IP header included. + */ + nread = recvfrom(pxping->sock4, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0, + (struct sockaddr *)&sin, &salen); + if (nread < 0) { + perror(__func__); + return; + } + + if (nread < IP_HLEN) { + DPRINTF2(("%s: read %d bytes, IP header truncated\n", + __func__, (unsigned int)nread)); + return; + } + + iph = (struct ip_hdr *)pollmgr_udpbuf; + + /* match version */ + if (IPH_V(iph) != 4) { + DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(iph))); + return; + } + + /* no fragmentation */ + if ((IPH_OFFSET(iph) & PP_HTONS(IP_OFFMASK | IP_MF)) != 0) { + DPRINTF2(("%s: dropping fragmented datagram\n", __func__)); + return; + } + + /* no options */ + if (IPH_HL(iph) * 4 != IP_HLEN) { + DPRINTF2(("%s: dropping datagram with options (IP header length %d)\n", + __func__, IPH_HL(iph) * 4)); + return; + } + + if (IPH_PROTO(iph) != IP_PROTO_ICMP) { + DPRINTF2(("%s: unexpected protocol %d\n", __func__, IPH_PROTO(iph))); + return; + } + + iplen = IPH_LEN(iph); +#if !defined(RT_OS_DARWIN) + /* darwin reports IPH_LEN in host byte order */ + iplen = ntohs(iplen); +#endif +#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) + /* darwin and solaris change IPH_LEN to payload length only */ + iplen += IP_HLEN; /* we verified there are no options */ + IPH_LEN(iph) = htons(iplen); +#endif + if (nread < iplen) { + DPRINTF2(("%s: read %d bytes but total length is %d bytes\n", + __func__, (unsigned int)nread, (unsigned int)iplen)); + return; + } + + if (iplen < IP_HLEN + ICMP_HLEN) { + DPRINTF2(("%s: IP length %d bytes, ICMP header truncated\n", + __func__, iplen)); + return; + } + + icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN); + if (ICMPH_TYPE(icmph) == ICMP_ER) { + pxping_pmgr_icmp4_echo(pxping, iplen, &sin); + } + else if (ICMPH_TYPE(icmph) == ICMP_DUR || ICMPH_TYPE(icmph) == ICMP_TE) { + pxping_pmgr_icmp4_error(pxping, iplen, &sin); + } +#if 1 + else { + DPRINTF2(("%s: ignoring ICMP type %d\n", __func__, ICMPH_TYPE(icmph))); + } +#endif +} + + +/** + * Check if this incoming ICMP echo reply is for one of our pings and + * forward it to the guest. + */ +static void +pxping_pmgr_icmp4_echo(struct pxping *pxping, + u16_t iplen, struct sockaddr_in *peer) +{ + struct ip_hdr *iph; + struct icmp_echo_hdr *icmph; + u16_t id, seq; + ip_addr_t guest_ip, target_ip; + int mapped; + struct ping_pcb *pcb; + u16_t guest_id; + u32_t sum; + + iph = (struct ip_hdr *)pollmgr_udpbuf; + icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN); + + id = icmph->id; + seq = icmph->seqno; + + { + char addrbuf[sizeof "255.255.255.255"]; + const char *addrstr; + + addrstr = inet_ntop(AF_INET, &peer->sin_addr, addrbuf, sizeof(addrbuf)); + DPRINTF(("<--- PING %s id 0x%x seq %d\n", + addrstr, ntohs(id), ntohs(seq))); + } + + + /* + * Is this a reply to one of our pings? + */ + + ip_addr_copy(target_ip, iph->src); + mapped = pxremap_inbound_ip4(&target_ip, &target_ip); + if (mapped == PXREMAP_FAILED) { + return; + } + if (mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) { + DPRINTF2(("%s: dropping packet with ttl 1\n", __func__)); + return; + } + + sys_mutex_lock(&pxping->lock); + pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id); + if (pcb == NULL) { + sys_mutex_unlock(&pxping->lock); + DPRINTF2(("%s: no match\n", __func__)); + return; + } + + DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb)); + + /* save info before unlocking since pcb may expire */ + ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src)); + guest_id = pcb->guest_id; + + sys_mutex_unlock(&pxping->lock); + + + /* + * Rewrite headers and forward to guest. + */ + + /* rewrite ICMP echo header */ + sum = (u16_t)~icmph->chksum; + sum += chksum_update_16(&icmph->id, guest_id); + sum = FOLD_U32T(sum); + icmph->chksum = ~sum; + + /* rewrite IP header */ + sum = (u16_t)~IPH_CHKSUM(iph); + sum += chksum_update_32((u32_t *)&iph->dest, + ip4_addr_get_u32(&guest_ip)); + if (mapped == PXREMAP_MAPPED) { + sum += chksum_update_32((u32_t *)&iph->src, + ip4_addr_get_u32(&target_ip)); + } + else { + IPH_TTL_SET(iph, IPH_TTL(iph) - 1); + sum += PP_NTOHS(~0x0100); + } + sum = FOLD_U32T(sum); + IPH_CHKSUM_SET(iph, ~sum); + + pxping_pmgr_forward_inbound(pxping, iplen); +} + + +/** + * Check if this incoming ICMP error (destination unreachable or time + * exceeded) is about one of our pings and forward it to the guest. + */ +static void +pxping_pmgr_icmp4_error(struct pxping *pxping, + u16_t iplen, struct sockaddr_in *peer) +{ + struct ip_hdr *iph, *oiph; + struct icmp_echo_hdr *icmph, *oicmph; + u16_t oipoff, oiphlen, oiplen; + u16_t id, seq; + ip_addr_t guest_ip, target_ip, error_ip; + int target_mapped, error_mapped; + struct ping_pcb *pcb; + u16_t guest_id; + u32_t sum; + + iph = (struct ip_hdr *)pollmgr_udpbuf; + icmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + IP_HLEN); + + /* + * Inner IP datagram is not checked by the kernel and may be + * anything, possibly malicious. + */ + + oipoff = IP_HLEN + ICMP_HLEN; + oiplen = iplen - oipoff; /* NB: truncated length, not IPH_LEN(oiph) */ + if (oiplen < IP_HLEN) { + DPRINTF2(("%s: original datagram truncated to %d bytes\n", + __func__, oiplen)); + } + + /* IP header of the original message */ + oiph = (struct ip_hdr *)(pollmgr_udpbuf + oipoff); + + /* match version */ + if (IPH_V(oiph) != 4) { + DPRINTF2(("%s: unexpected IP version %d\n", __func__, IPH_V(oiph))); + return; + } + + /* can't match fragments except the first one */ + if ((IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK)) != 0) { + DPRINTF2(("%s: ignoring fragment with offset %d\n", + __func__, ntohs(IPH_OFFSET(oiph) & PP_HTONS(IP_OFFMASK)))); + return; + } + + if (IPH_PROTO(oiph) != IP_PROTO_ICMP) { +#if 0 + /* don't spam with every "destination unreachable" in the system */ + DPRINTF2(("%s: ignoring protocol %d\n", __func__, IPH_PROTO(oiph))); +#endif + return; + } + + oiphlen = IPH_HL(oiph) * 4; + if (oiplen < oiphlen + ICMP_HLEN) { + DPRINTF2(("%s: original datagram truncated to %d bytes\n", + __func__, oiplen)); + return; + } + + oicmph = (struct icmp_echo_hdr *)(pollmgr_udpbuf + oipoff + oiphlen); + if (ICMPH_TYPE(oicmph) != ICMP_ECHO) { + DPRINTF2(("%s: ignoring ICMP error for original ICMP type %d\n", + __func__, ICMPH_TYPE(oicmph))); + return; + } + + id = oicmph->id; + seq = oicmph->seqno; + + { + char addrbuf[sizeof "255.255.255.255"]; + const char *addrstr; + + addrstr = inet_ntop(AF_INET, &oiph->dest, addrbuf, sizeof(addrbuf)); + DPRINTF2(("%s: ping %s id 0x%x seq %d", + __func__, addrstr, ntohs(id), ntohs(seq))); + if (ICMPH_TYPE(icmph) == ICMP_DUR) { + DPRINTF2((" unreachable (code %d)\n", ICMPH_CODE(icmph))); + } + else { + DPRINTF2((" time exceeded\n")); + } + } + + + /* + * Is the inner (failed) datagram one of our pings? + */ + + ip_addr_copy(target_ip, oiph->dest); /* inner (failed) */ + target_mapped = pxremap_inbound_ip4(&target_ip, &target_ip); + if (target_mapped == PXREMAP_FAILED) { + return; + } + + sys_mutex_lock(&pxping->lock); + pcb = pxping_pcb_for_reply(pxping, 0, ip_2_ipX(&target_ip), id); + if (pcb == NULL) { + sys_mutex_unlock(&pxping->lock); + DPRINTF2(("%s: no match\n", __func__)); + return; + } + + DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb)); + + /* save info before unlocking since pcb may expire */ + ip_addr_copy(guest_ip, *ipX_2_ip(&pcb->src)); + guest_id = pcb->guest_id; + + sys_mutex_unlock(&pxping->lock); + + + /* + * Rewrite both inner and outer headers and forward to guest. + * Note that the checksum of the outer ICMP error message is + * preserved by the changes we do to inner headers. + */ + + ip_addr_copy(error_ip, iph->src); /* node that reports the error */ + error_mapped = pxremap_inbound_ip4(&error_ip, &error_ip); + if (error_mapped == PXREMAP_FAILED) { + return; + } + if (error_mapped == PXREMAP_ASIS && IPH_TTL(iph) == 1) { + DPRINTF2(("%s: dropping packet with ttl 1\n", __func__)); + return; + } + + /* rewrite inner ICMP echo header */ + sum = (u16_t)~oicmph->chksum; + sum += chksum_update_16(&oicmph->id, guest_id); + sum = FOLD_U32T(sum); + oicmph->chksum = ~sum; + + /* rewrite inner IP header */ + sum = (u16_t)~IPH_CHKSUM(oiph); + sum += chksum_update_32((u32_t *)&oiph->src, ip4_addr_get_u32(&guest_ip)); + if (target_mapped == PXREMAP_MAPPED) { + sum += chksum_update_32((u32_t *)&oiph->dest, ip4_addr_get_u32(&target_ip)); + } + sum = FOLD_U32T(sum); + IPH_CHKSUM_SET(oiph, ~sum); + + /* rewrite outer IP header */ + sum = (u16_t)~IPH_CHKSUM(iph); + sum += chksum_update_32((u32_t *)&iph->dest, ip4_addr_get_u32(&guest_ip)); + if (error_mapped == PXREMAP_MAPPED) { + sum += chksum_update_32((u32_t *)&iph->src, ip4_addr_get_u32(&error_ip)); + } + else { + IPH_TTL_SET(iph, IPH_TTL(iph) - 1); + sum += PP_NTOHS(~0x0100); + } + sum = FOLD_U32T(sum); + IPH_CHKSUM_SET(iph, ~sum); + + pxping_pmgr_forward_inbound(pxping, iplen); +} + + +/** + * Process incoming ICMPv6 message for the host. + * NB: we will get a lot of spam here and have to sift through it. + */ +static void +pxping_pmgr_icmp6(struct pxping *pxping) +{ +#ifndef RT_OS_WINDOWS + struct msghdr mh; + ssize_t nread; +#else + WSAMSG mh; + DWORD nread; +#endif + IOVEC iov[1]; + static u8_t cmsgbuf[128]; + struct cmsghdr *cmh; + struct sockaddr_in6 sin6; + socklen_t salen = sizeof(sin6); + struct icmp6_echo_hdr *icmph; + struct in6_pktinfo *pktinfo; + int hopl, tclass; + int status; + + char addrbuf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"]; + const char *addrstr; + + /* + * Reads from raw IPv6 sockets deliver only the payload. Full + * headers are available via recvmsg(2)/cmsg(3). + */ + IOVEC_SET_BASE(iov[0], pollmgr_udpbuf); + IOVEC_SET_LEN(iov[0], sizeof(pollmgr_udpbuf)); + + memset(&mh, 0, sizeof(mh)); +#ifndef RT_OS_WINDOWS + mh.msg_name = &sin6; + mh.msg_namelen = sizeof(sin6); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + mh.msg_control = cmsgbuf; + mh.msg_controllen = sizeof(cmsgbuf); + mh.msg_flags = 0; + + nread = recvmsg(pxping->sock6, &mh, 0); + if (nread < 0) { + perror(__func__); + return; + } +#else /* RT_OS_WINDOWS */ + mh.name = (LPSOCKADDR)&sin6; + mh.namelen = sizeof(sin6); + mh.lpBuffers = iov; + mh.dwBufferCount = 1; + mh.Control.buf = cmsgbuf; + mh.Control.len = sizeof(cmsgbuf); + mh.dwFlags = 0; + + status = (*pxping->pfWSARecvMsg6)(pxping->sock6, &mh, &nread, NULL, NULL); + if (status == SOCKET_ERROR) { + DPRINTF2(("%s: error %d\n", __func__, WSAGetLastError())); + return; + } +#endif + + icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf; + + addrstr = inet_ntop(AF_INET6, (void *)&sin6.sin6_addr, + addrbuf, sizeof(addrbuf)); + DPRINTF2(("%s: %s ICMPv6: ", __func__, addrstr)); + + if (icmph->type == ICMP6_TYPE_EREP) { + DPRINTF2(("echo reply %04x %u\n", + (unsigned int)icmph->id, (unsigned int)icmph->seqno)); + } + else { /* XXX */ + if (icmph->type == ICMP6_TYPE_EREQ) { + DPRINTF2(("echo request %04x %u\n", + (unsigned int)icmph->id, (unsigned int)icmph->seqno)); + } + else if (icmph->type == ICMP6_TYPE_DUR) { + DPRINTF2(("destination unreachable\n")); + } + else if (icmph->type == ICMP6_TYPE_PTB) { + DPRINTF2(("packet too big\n")); + } + else if (icmph->type == ICMP6_TYPE_TE) { + DPRINTF2(("time exceeded\n")); + } + else if (icmph->type == ICMP6_TYPE_PP) { + DPRINTF2(("parameter problem\n")); + } + else { + DPRINTF2(("type %d len %u\n", icmph->type, (unsigned int)nread)); + } + + if (icmph->type >= ICMP6_TYPE_EREQ) { + return; /* informational message */ + } + } + + pktinfo = NULL; + hopl = -1; + tclass = -1; + for (cmh = CMSG_FIRSTHDR(&mh); cmh != NULL; cmh = CMSG_NXTHDR(&mh, cmh)) { + if (cmh->cmsg_len == 0) + break; + + if (cmh->cmsg_level == IPPROTO_IPV6 + && cmh->cmsg_type == IPV6_HOPLIMIT + && cmh->cmsg_len == CMSG_LEN(sizeof(int))) + { + hopl = *(int *)CMSG_DATA(cmh); + DPRINTF2(("hoplimit = %d\n", hopl)); + } + + if (cmh->cmsg_level == IPPROTO_IPV6 + && cmh->cmsg_type == IPV6_PKTINFO + && cmh->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo))) + { + pktinfo = (struct in6_pktinfo *)CMSG_DATA(cmh); + DPRINTF2(("pktinfo found\n")); + } + } + + if (pktinfo == NULL) { + /* + * ip6_output_if() doesn't do checksum for us so we need to + * manually recompute it - for this we must know the + * destination address of the pseudo-header that we will + * rewrite with guest's address. (TODO: yeah, yeah, we can + * compute it from scratch...) + */ + DPRINTF2(("%s: unable to get pktinfo\n", __func__)); + return; + } + + if (hopl < 0) { + hopl = LWIP_ICMP6_HL; + } + + if (icmph->type == ICMP6_TYPE_EREP) { + pxping_pmgr_icmp6_echo(pxping, + (ip6_addr_t *)&sin6.sin6_addr, + (ip6_addr_t *)&pktinfo->ipi6_addr, + hopl, tclass, (u16_t)nread); + } + else if (icmph->type < ICMP6_TYPE_EREQ) { + pxping_pmgr_icmp6_error(pxping, + (ip6_addr_t *)&sin6.sin6_addr, + (ip6_addr_t *)&pktinfo->ipi6_addr, + hopl, tclass, (u16_t)nread); + } +} + + +/** + * Check if this incoming ICMPv6 echo reply is for one of our pings + * and forward it to the guest. + */ +static void +pxping_pmgr_icmp6_echo(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + int hopl, int tclass, u16_t icmplen) +{ + struct icmp6_echo_hdr *icmph; + ip6_addr_t guest_ip, target_ip; + int mapped; + struct ping_pcb *pcb; + u16_t id, guest_id; + u32_t sum; + + ip6_addr_copy(target_ip, *src); + mapped = pxremap_inbound_ip6(&target_ip, &target_ip); + if (mapped == PXREMAP_FAILED) { + return; + } + else if (mapped == PXREMAP_ASIS) { + if (hopl == 1) { + DPRINTF2(("%s: dropping packet with ttl 1\n", __func__)); + return; + } + --hopl; + } + + icmph = (struct icmp6_echo_hdr *)pollmgr_udpbuf; + id = icmph->id; + + sys_mutex_lock(&pxping->lock); + pcb = pxping_pcb_for_reply(pxping, 1, ip6_2_ipX(&target_ip), id); + if (pcb == NULL) { + sys_mutex_unlock(&pxping->lock); + DPRINTF2(("%s: no match\n", __func__)); + return; + } + + DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb)); + + /* save info before unlocking since pcb may expire */ + ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src)); + guest_id = pcb->guest_id; + + sys_mutex_unlock(&pxping->lock); + + /* rewrite ICMPv6 echo header */ + sum = (u16_t)~icmph->chksum; + sum += chksum_update_16(&icmph->id, guest_id); + sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */ + if (mapped) { + sum += chksum_delta_ipv6(src, &target_ip); /* pseudo */ + } + sum = FOLD_U32T(sum); + icmph->chksum = ~sum; + + pxping_pmgr_forward_inbound6(pxping, + &target_ip, /* echo reply src */ + &guest_ip, /* echo reply dst */ + hopl, tclass, icmplen); +} + + +/** + * Check if this incoming ICMPv6 error is about one of our pings and + * forward it to the guest. + */ +static void +pxping_pmgr_icmp6_error(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + int hopl, int tclass, u16_t icmplen) +{ + struct icmp6_hdr *icmph; + u8_t *bufptr; + size_t buflen, hlen; + int proto; + struct ip6_hdr *oiph; + struct icmp6_echo_hdr *oicmph; + struct ping_pcb *pcb; + ip6_addr_t guest_ip, target_ip, error_ip; + int target_mapped, error_mapped; + u16_t guest_id; + u32_t sum; + + icmph = (struct icmp6_hdr *)pollmgr_udpbuf; + + /* + * Inner IP datagram is not checked by the kernel and may be + * anything, possibly malicious. + */ + oiph = NULL; + oicmph = NULL; + + bufptr = pollmgr_udpbuf; + buflen = icmplen; + + hlen = sizeof(*icmph); + proto = IP6_NEXTH_ENCAPS; /* i.e. IPv6, lwIP's name is unfortuate */ + for (;;) { + if (hlen > buflen) { + DPRINTF2(("truncated datagram inside ICMPv6 error message is too short\n")); + return; + } + buflen -= hlen; + bufptr += hlen; + + if (proto == IP6_NEXTH_ENCAPS && oiph == NULL) { /* outermost IPv6 */ + oiph = (struct ip6_hdr *)bufptr; + if (IP6H_V(oiph) != 6) { + DPRINTF2(("%s: unexpected IP version %d\n", __func__, IP6H_V(oiph))); + return; + } + + proto = IP6H_NEXTH(oiph); + hlen = IP6_HLEN; + } + else if (proto == IP6_NEXTH_ICMP6) { + oicmph = (struct icmp6_echo_hdr *)bufptr; + break; + } + else if (proto == IP6_NEXTH_ROUTING + || proto == IP6_NEXTH_HOPBYHOP + || proto == IP6_NEXTH_DESTOPTS) + { + proto = bufptr[0]; + hlen = (bufptr[1] + 1) * 8; + } + else { + DPRINTF2(("%s: stopping at protocol %d\n", __func__, proto)); + break; + } + } + + if (oiph == NULL || oicmph == NULL) { + return; + } + + if (buflen < sizeof(*oicmph)) { + DPRINTF2(("%s: original ICMPv6 is truncated too short\n", __func__)); + return; + } + + if (oicmph->type != ICMP6_TYPE_EREQ) { + DPRINTF2(("%s: ignoring original ICMPv6 type %d\n", __func__, oicmph->type)); + return; + } + + memcpy(&target_ip, &oiph->dest, sizeof(target_ip)); /* inner (failed) */ + target_mapped = pxremap_inbound_ip6(&target_ip, &target_ip); + if (target_mapped == PXREMAP_FAILED) { + return; + } + + sys_mutex_lock(&pxping->lock); + pcb = pxping_pcb_for_reply(pxping, 1, ip_2_ipX(&target_ip), oicmph->id); + if (pcb == NULL) { + sys_mutex_unlock(&pxping->lock); + DPRINTF2(("%s: no match\n", __func__)); + return; + } + + DPRINTF2(("%s: pcb %p\n", __func__, (void *)pcb)); + + /* save info before unlocking since pcb may expire */ + ip6_addr_copy(guest_ip, *ipX_2_ip6(&pcb->src)); + guest_id = pcb->guest_id; + + sys_mutex_unlock(&pxping->lock); + + + /* + * Rewrite inner and outer headers and forward to guest. Note + * that IPv6 has no IP header checksum, but uses pseudo-header for + * ICMPv6, so we update both in one go, adjusting ICMPv6 checksum + * as we rewrite IP header. + */ + + ip6_addr_copy(error_ip, *src); /* node that reports the error */ + error_mapped = pxremap_inbound_ip6(&error_ip, &error_ip); + if (error_mapped == PXREMAP_FAILED) { + return; + } + if (error_mapped == PXREMAP_ASIS && hopl == 1) { + DPRINTF2(("%s: dropping packet with ttl 1\n", __func__)); + return; + } + + /* rewrite inner ICMPv6 echo header and inner IPv6 header */ + sum = (u16_t)~oicmph->chksum; + sum += chksum_update_16(&oicmph->id, guest_id); + sum += chksum_update_ipv6((ip6_addr_t *)&oiph->src, &guest_ip); + if (target_mapped) { + sum += chksum_delta_ipv6((ip6_addr_t *)&oiph->dest, &target_ip); + } + sum = FOLD_U32T(sum); + oicmph->chksum = ~sum; + + /* rewrite outer ICMPv6 error header */ + sum = (u16_t)~icmph->chksum; + sum += chksum_delta_ipv6(dst, &guest_ip); /* pseudo */ + if (error_mapped) { + sum += chksum_delta_ipv6(src, &error_ip); /* pseudo */ + } + sum = FOLD_U32T(sum); + icmph->chksum = ~sum; + + pxping_pmgr_forward_inbound6(pxping, + &error_ip, /* error src */ + &guest_ip, /* error dst */ + hopl, tclass, icmplen); +} + + +/** + * Hand off ICMP datagram to the lwip thread where it will be + * forwarded to the guest. + * + * We no longer need ping_pcb. The pcb may get expired on the lwip + * thread, but we have already patched necessary information into the + * datagram. + */ +static void +pxping_pmgr_forward_inbound(struct pxping *pxping, u16_t iplen) +{ + struct pbuf *p; + struct ping_msg *msg; + err_t error; + + p = pbuf_alloc(PBUF_LINK, iplen, PBUF_RAM); + if (p == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", + __func__, (unsigned int)iplen)); + return; + } + + error = pbuf_take(p, pollmgr_udpbuf, iplen); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", + __func__, (unsigned int)iplen)); + pbuf_free(p); + return; + } + + msg = (struct ping_msg *)malloc(sizeof(*msg)); + if (msg == NULL) { + pbuf_free(p); + return; + } + + msg->msg.type = TCPIP_MSG_CALLBACK_STATIC; + msg->msg.sem = NULL; + msg->msg.msg.cb.function = pxping_pcb_forward_inbound; + msg->msg.msg.cb.ctx = (void *)msg; + + msg->pxping = pxping; + msg->p = p; + + proxy_lwip_post(&msg->msg); +} + + +static void +pxping_pcb_forward_inbound(void *arg) +{ + struct ping_msg *msg = (struct ping_msg *)arg; + err_t error; + + LWIP_ASSERT1(msg != NULL); + LWIP_ASSERT1(msg->pxping != NULL); + LWIP_ASSERT1(msg->p != NULL); + + error = ip_raw_output_if(msg->p, msg->pxping->netif); + if (error != ERR_OK) { + DPRINTF(("%s: ip_output_if: %s\n", + __func__, proxy_lwip_strerr(error))); + } + pbuf_free(msg->p); + free(msg); +} + + +static void +pxping_pmgr_forward_inbound6(struct pxping *pxping, + ip6_addr_t *src, ip6_addr_t *dst, + u8_t hopl, u8_t tclass, + u16_t icmplen) +{ + struct pbuf *p; + struct ping6_msg *msg; + + err_t error; + + p = pbuf_alloc(PBUF_IP, icmplen, PBUF_RAM); + if (p == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", + __func__, (unsigned int)icmplen)); + return; + } + + error = pbuf_take(p, pollmgr_udpbuf, icmplen); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", + __func__, (unsigned int)icmplen)); + pbuf_free(p); + return; + } + + msg = (struct ping6_msg *)malloc(sizeof(*msg)); + if (msg == NULL) { + pbuf_free(p); + return; + } + + msg->msg.type = TCPIP_MSG_CALLBACK_STATIC; + msg->msg.sem = NULL; + msg->msg.msg.cb.function = pxping_pcb_forward_inbound6; + msg->msg.msg.cb.ctx = (void *)msg; + + msg->pxping = pxping; + msg->p = p; + ip6_addr_copy(msg->src, *src); + ip6_addr_copy(msg->dst, *dst); + msg->hopl = hopl; + msg->tclass = tclass; + + proxy_lwip_post(&msg->msg); +} + + +static void +pxping_pcb_forward_inbound6(void *arg) +{ + struct ping6_msg *msg = (struct ping6_msg *)arg; + err_t error; + + LWIP_ASSERT1(msg != NULL); + LWIP_ASSERT1(msg->pxping != NULL); + LWIP_ASSERT1(msg->p != NULL); + + error = ip6_output_if(msg->p, + &msg->src, &msg->dst, msg->hopl, msg->tclass, + IP6_NEXTH_ICMP6, msg->pxping->netif); + if (error != ERR_OK) { + DPRINTF(("%s: ip6_output_if: %s\n", + __func__, proxy_lwip_strerr(error))); + } + pbuf_free(msg->p); + free(msg); +} diff --git a/src/VBox/NetworkServices/NAT/pxping_win.c b/src/VBox/NetworkServices/NAT/pxping_win.c new file mode 100644 index 00000000..58eb9c87 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxping_win.c @@ -0,0 +1,629 @@ +/* -*- indent-tabs-mode: nil; -*- */ + +#include "winutils.h" +#include "proxy.h" +#include "pxremap.h" + +#include "lwip/ip.h" +#include "lwip/icmp.h" +#include "lwip/inet_chksum.h" + +/* XXX: lwIP names conflict with winsock <iphlpapi.h> */ +#undef IP_STATS +#undef ICMP_STATS +#undef TCP_STATS +#undef UDP_STATS +#undef IP6_STATS + +#include <winternl.h> /* for PIO_APC_ROUTINE &c */ +#include <iphlpapi.h> +#include <icmpapi.h> + +#include <stdio.h> + + +struct pxping { + /* + * We use single ICMP handle for all pings. This means that all + * proxied pings will have the same id and share single sequence + * of sequence numbers. + */ + HANDLE hdl4; + HANDLE hdl6; + + struct netif *netif; + + /* + * On Windows XP and Windows Server 2003 IcmpSendEcho2() callback + * is FARPROC, but starting from Vista it's PIO_APC_ROUTINE with + * two extra arguments. Callbacks use WINAPI (stdcall) calling + * convention with callee responsible for popping the arguments, + * so to avoid stack corruption we check windows version at run + * time and provide correct callback. + */ + void *callback4; + void *callback6; +}; + + +struct pong4 { + struct netif *netif; + + struct ip_hdr reqiph; + struct icmp_echo_hdr reqicmph; + + size_t bufsize; + u8_t buf[1]; +}; + + +struct pong6 { + struct netif *netif; + + ip6_addr_t reqsrc; + struct icmp6_echo_hdr reqicmph; + size_t reqsize; + + size_t bufsize; + u8_t buf[1]; +}; + + +static void pxping_recv4(void *arg, struct pbuf *p); +static void pxping_recv6(void *arg, struct pbuf *p); + +static VOID WINAPI pxping_icmp4_callback_old(void *); +static VOID WINAPI pxping_icmp4_callback_apc(void *, PIO_STATUS_BLOCK, ULONG); +static void pxping_icmp4_callback(struct pong4 *pong); + +static VOID WINAPI pxping_icmp6_callback_old(void *); +static VOID WINAPI pxping_icmp6_callback_apc(void *, PIO_STATUS_BLOCK, ULONG); +static void pxping_icmp6_callback(struct pong6 *pong); + + +struct pxping g_pxping; + + +err_t +pxping_init(struct netif *netif, SOCKET sock4, SOCKET sock6) +{ + OSVERSIONINFO osvi; + int status; + + LWIP_UNUSED_ARG(sock4); + LWIP_UNUSED_ARG(sock6); + + ZeroMemory(&osvi, sizeof(OSVERSIONINFO)); + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + status = GetVersionEx(&osvi); + if (status == 0) { + return ERR_ARG; + } + + if (osvi.dwMajorVersion >= 6) { + g_pxping.callback4 = (void *)pxping_icmp4_callback_apc; + g_pxping.callback6 = (void *)pxping_icmp6_callback_apc; + } + else { + g_pxping.callback4 = (void *)pxping_icmp4_callback_old; + g_pxping.callback6 = (void *)pxping_icmp6_callback_old; + } + + + g_pxping.hdl4 = IcmpCreateFile(); + if (g_pxping.hdl4 != INVALID_HANDLE_VALUE) { + ping_proxy_accept(pxping_recv4, &g_pxping); + } + else { + DPRINTF(("IcmpCreateFile: error %d\n", GetLastError())); + } + + g_pxping.hdl6 = Icmp6CreateFile(); + if (g_pxping.hdl6 != INVALID_HANDLE_VALUE) { + ping6_proxy_accept(pxping_recv6, &g_pxping); + } + else { + DPRINTF(("Icmp6CreateFile: error %d\n", GetLastError())); + } + + if (g_pxping.hdl4 == INVALID_HANDLE_VALUE + && g_pxping.hdl6 == INVALID_HANDLE_VALUE) + { + return ERR_ARG; + } + + g_pxping.netif = netif; + + return ERR_OK; +} + + +/** + * ICMP Echo Request in pbuf "p" is to be proxied. + */ +static void +pxping_recv4(void *arg, struct pbuf *p) +{ + struct pxping *pxping = (struct pxping *)arg; + const struct ip_hdr *iph; + const struct icmp_echo_hdr *icmph; + u16_t iphlen; + size_t bufsize; + struct pong4 *pong; + IPAddr dst; + int mapped; + int ttl; + IP_OPTION_INFORMATION opts; + void *reqdata; + size_t reqsize; + int status; + + pong = NULL; + + iphlen = ip_current_header_tot_len(); + if (RT_UNLIKELY(iphlen != IP_HLEN)) { /* we don't do options */ + goto out; + } + + iph = (const struct ip_hdr *)ip_current_header(); + icmph = (const struct icmp_echo_hdr *)p->payload; + + mapped = pxremap_outbound_ip4((ip_addr_t *)&dst, (ip_addr_t *)&iph->dest); + if (RT_UNLIKELY(mapped == PXREMAP_FAILED)) { + goto out; + } + + ttl = IPH_TTL(iph); + if (mapped == PXREMAP_ASIS) { + if (RT_UNLIKELY(ttl == 1)) { + status = pbuf_header(p, iphlen); /* back to IP header */ + if (RT_LIKELY(status == 0)) { + icmp_time_exceeded(p, ICMP_TE_TTL); + } + goto out; + } + --ttl; + } + + status = pbuf_header(p, -(u16_t)sizeof(*icmph)); /* to ping payload */ + if (RT_UNLIKELY(status != 0)) { + goto out; + } + + bufsize = sizeof(ICMP_ECHO_REPLY) + p->tot_len; + pong = (struct pong4 *)malloc(sizeof(*pong) - sizeof(pong->buf) + bufsize); + if (RT_UNLIKELY(pong == NULL)) { + goto out; + } + pong->bufsize = bufsize; + pong->netif = pxping->netif; + + memcpy(&pong->reqiph, iph, sizeof(*iph)); + memcpy(&pong->reqicmph, icmph, sizeof(*icmph)); + + reqsize = p->tot_len; + if (p->next == NULL) { + /* single pbuf can be directly used as request data source */ + reqdata = p->payload; + } + else { + /* data from pbuf chain must be concatenated */ + pbuf_copy_partial(p, pong->buf, p->tot_len, 0); + reqdata = pong->buf; + } + + opts.Ttl = ttl; + opts.Tos = IPH_TOS(iph); /* affected by DisableUserTOSSetting key */ + opts.Flags = (IPH_OFFSET(iph) & PP_HTONS(IP_DF)) != 0 ? IP_FLAG_DF : 0; + opts.OptionsSize = 0; + opts.OptionsData = 0; + + status = IcmpSendEcho2(pxping->hdl4, NULL, + pxping->callback4, pong, + dst, reqdata, (WORD)reqsize, &opts, + pong->buf, (DWORD)pong->bufsize, + 5 * 1000 /* ms */); + + if (RT_UNLIKELY(status != 0)) { + DPRINTF(("IcmpSendEcho2: unexpected status %d\n", status)); + goto out; + } + else if ((status = GetLastError()) != ERROR_IO_PENDING) { + int code; + + DPRINTF(("IcmpSendEcho2: error %d\n", status)); + switch (status) { + case ERROR_NETWORK_UNREACHABLE: + code = ICMP_DUR_NET; + break; + case ERROR_HOST_UNREACHABLE: + code = ICMP_DUR_HOST; + break; + default: + code = -1; + break; + } + + if (code != -1) { + /* move payload back to IP header */ + status = pbuf_header(p, (u16_t)(sizeof(*icmph) + iphlen)); + if (RT_LIKELY(status == 0)) { + icmp_dest_unreach(p, code); + } + } + goto out; + } + + pong = NULL; /* callback owns it now */ + out: + if (pong != NULL) { + free(pong); + } + pbuf_free(p); +} + + +static VOID WINAPI +pxping_icmp4_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved) +{ + struct pong4 *pong = (struct pong4 *)ctx; + LWIP_UNUSED_ARG(iob); + LWIP_UNUSED_ARG(reserved); + + if (pong != NULL) { + pxping_icmp4_callback(pong); + free(pong); + } +} + + +static VOID WINAPI +pxping_icmp4_callback_old(void *ctx) +{ + struct pong4 *pong = (struct pong4 *)ctx; + + if (pong != NULL) { + pxping_icmp4_callback(pong); + free(pong); + } +} + + +static void +pxping_icmp4_callback(struct pong4 *pong) +{ + ICMP_ECHO_REPLY *reply; + DWORD nreplies; + size_t icmplen; + struct pbuf *p; + struct icmp_echo_hdr *icmph; + ip_addr_t src; + int mapped; + + nreplies = IcmpParseReplies(pong->buf, (DWORD)pong->bufsize); + if (nreplies <= 0) { + DWORD error = GetLastError(); + if (error == IP_REQ_TIMED_OUT) { + DPRINTF2(("pong4: %p timed out\n", (void *)pong)); + } + else { + DPRINTF(("pong4: %p: IcmpParseReplies: error %d\n", + (void *)pong, error)); + } + return; + } + + reply = (ICMP_ECHO_REPLY *)pong->buf; + + if (reply->Options.OptionsSize != 0) { /* don't do options */ + return; + } + + mapped = pxremap_inbound_ip4(&src, (ip_addr_t *)&reply->Address); + if (mapped == PXREMAP_FAILED) { + return; + } + if (mapped == PXREMAP_ASIS) { + if (reply->Options.Ttl == 1) { + return; + } + --reply->Options.Ttl; + } + + if (reply->Status == IP_SUCCESS) { + icmplen = sizeof(struct icmp_echo_hdr) + reply->DataSize; + if ((reply->Options.Flags & IP_FLAG_DF) != 0 + && IP_HLEN + icmplen > pong->netif->mtu) + { + return; + } + + p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM); + if (RT_UNLIKELY(p == NULL)) { + return; + } + + icmph = (struct icmp_echo_hdr *)p->payload; + icmph->type = ICMP_ER; + icmph->code = 0; + icmph->chksum = 0; + icmph->id = pong->reqicmph.id; + icmph->seqno = pong->reqicmph.seqno; + + memcpy((u8_t *)p->payload + sizeof(*icmph), + reply->Data, reply->DataSize); + } + else { + u8_t type, code; + + switch (reply->Status) { + case IP_DEST_NET_UNREACHABLE: + type = ICMP_DUR; code = ICMP_DUR_NET; + break; + case IP_DEST_HOST_UNREACHABLE: + type = ICMP_DUR; code = ICMP_DUR_HOST; + break; + case IP_DEST_PROT_UNREACHABLE: + type = ICMP_DUR; code = ICMP_DUR_PROTO; + break; + case IP_PACKET_TOO_BIG: + type = ICMP_DUR; code = ICMP_DUR_FRAG; + break; + case IP_SOURCE_QUENCH: + type = ICMP_SQ; code = 0; + break; + case IP_TTL_EXPIRED_TRANSIT: + type = ICMP_TE; code = ICMP_TE_TTL; + break; + case IP_TTL_EXPIRED_REASSEM: + type = ICMP_TE; code = ICMP_TE_FRAG; + break; + default: + DPRINTF(("pong4: reply status %d, dropped\n", reply->Status)); + return; + } + + DPRINTF(("pong4: reply status %d -> type %d/code %d\n", + reply->Status, type, code)); + + icmplen = sizeof(*icmph) + sizeof(pong->reqiph) + sizeof(pong->reqicmph); + + p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM); + if (RT_UNLIKELY(p == NULL)) { + return; + } + + icmph = (struct icmp_echo_hdr *)p->payload; + icmph->type = type; + icmph->code = code; + icmph->chksum = 0; + icmph->id = 0; + icmph->seqno = 0; + + /* + * XXX: we don't know the TTL of the request at the time this + * ICMP error was generated (we can guess it was 1 for ttl + * exceeded, but don't bother faking it). + */ + memcpy((u8_t *)p->payload + sizeof(*icmph), + &pong->reqiph, sizeof(pong->reqiph)); + + memcpy((u8_t *)p->payload + sizeof(*icmph) + sizeof(pong->reqiph), + &pong->reqicmph, sizeof(pong->reqicmph)); + } + + icmph->chksum = inet_chksum(p->payload, (u16_t)icmplen); + ip_output_if(p, &src, + (ip_addr_t *)&pong->reqiph.src, /* dst */ + reply->Options.Ttl, + reply->Options.Tos, + IPPROTO_ICMP, + pong->netif); + pbuf_free(p); +} + + +static void +pxping_recv6(void *arg, struct pbuf *p) +{ + struct pxping *pxping = (struct pxping *)arg; + struct icmp6_echo_hdr *icmph; + size_t bufsize; + struct pong6 *pong; + int mapped; + void *reqdata; + size_t reqsize; + struct sockaddr_in6 src, dst; + int hopl; + IP_OPTION_INFORMATION opts; + int status; + + pong = NULL; + + icmph = (struct icmp6_echo_hdr *)p->payload; + + memset(&dst, 0, sizeof(dst)); + dst.sin6_family = AF_INET6; + mapped = pxremap_outbound_ip6((ip6_addr_t *)&dst.sin6_addr, + ip6_current_dest_addr()); + if (RT_UNLIKELY(mapped == PXREMAP_FAILED)) { + goto out; + } + + hopl = IP6H_HOPLIM(ip6_current_header()); + if (mapped == PXREMAP_ASIS) { + if (RT_UNLIKELY(hopl == 1)) { + status = pbuf_header(p, ip_current_header_tot_len()); + if (RT_LIKELY(status == 0)) { + icmp6_time_exceeded(p, ICMP6_TE_HL); + } + goto out; + } + --hopl; + } + + status = pbuf_header(p, -(u16_t)sizeof(*icmph)); /* to ping payload */ + if (RT_UNLIKELY(status != 0)) { + goto out; + } + + bufsize = sizeof(ICMPV6_ECHO_REPLY) + p->tot_len; + pong = (struct pong6 *)malloc(sizeof(*pong) - sizeof(pong->buf) + bufsize); + if (RT_UNLIKELY(pong == NULL)) { + goto out; + } + pong->bufsize = bufsize; + pong->netif = pxping->netif; + + ip6_addr_copy(pong->reqsrc, *ip6_current_src_addr()); + memcpy(&pong->reqicmph, icmph, sizeof(*icmph)); + + memset(pong->buf, 0xa5, pong->bufsize); + + pong->reqsize = reqsize = p->tot_len; + if (p->next == NULL) { + /* single pbuf can be directly used as request data source */ + reqdata = p->payload; + } + else { + /* data from pbuf chain must be concatenated */ + pbuf_copy_partial(p, pong->buf, p->tot_len, 0); + reqdata = pong->buf; + } + + memset(&src, 0, sizeof(src)); + src.sin6_family = AF_INET6; + src.sin6_addr = in6addr_any; /* let the OS select host source address */ + + memset(&opts, 0, sizeof(opts)); + opts.Ttl = hopl; + + status = Icmp6SendEcho2(pxping->hdl6, NULL, + pxping->callback6, pong, + &src, &dst, reqdata, (WORD)reqsize, &opts, + pong->buf, (DWORD)pong->bufsize, + 5 * 1000 /* ms */); + + if (RT_UNLIKELY(status != 0)) { + DPRINTF(("Icmp6SendEcho2: unexpected status %d\n", status)); + goto out; + } + else if ((status = GetLastError()) != ERROR_IO_PENDING) { + int code; + + DPRINTF(("Icmp6SendEcho2: error %d\n", status)); + switch (status) { + case ERROR_NETWORK_UNREACHABLE: + case ERROR_HOST_UNREACHABLE: + code = ICMP6_DUR_NO_ROUTE; + break; + default: + code = -1; + break; + } + + if (code != -1) { + /* move payload back to IP header */ + status = pbuf_header(p, (u16_t)(sizeof(*icmph) + + ip_current_header_tot_len())); + if (RT_LIKELY(status == 0)) { + icmp6_dest_unreach(p, code); + } + } + goto out; + } + + pong = NULL; /* callback owns it now */ + out: + if (pong != NULL) { + free(pong); + } + pbuf_free(p); +} + + +static VOID WINAPI +pxping_icmp6_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved) +{ + struct pong6 *pong = (struct pong6 *)ctx; + LWIP_UNUSED_ARG(iob); + LWIP_UNUSED_ARG(reserved); + + if (pong != NULL) { + pxping_icmp6_callback(pong); + free(pong); + } +} + + +static VOID WINAPI +pxping_icmp6_callback_old(void *ctx) +{ + struct pong6 *pong = (struct pong6 *)ctx; + + if (pong != NULL) { + pxping_icmp6_callback(pong); + free(pong); + } +} + + +static void +pxping_icmp6_callback(struct pong6 *pong) +{ + DWORD nreplies; + ICMPV6_ECHO_REPLY *reply; + struct pbuf *p; + struct icmp6_echo_hdr *icmph; + size_t icmplen; + ip6_addr_t src; + int mapped; + + nreplies = Icmp6ParseReplies(pong->buf, (DWORD)pong->bufsize); + if (nreplies <= 0) { + DWORD error = GetLastError(); + if (error == IP_REQ_TIMED_OUT) { + DPRINTF2(("pong6: %p timed out\n", (void *)pong)); + } + else { + DPRINTF(("pong6: %p: Icmp6ParseReplies: error %d\n", + (void *)pong, error)); + } + return; + } + + reply = (ICMPV6_ECHO_REPLY *)pong->buf; + + mapped = pxremap_inbound_ip6(&src, (ip6_addr_t *)reply->Address.sin6_addr); + if (mapped == PXREMAP_FAILED) { + return; + } + + /* + * Reply data follows ICMPV6_ECHO_REPLY structure in memory, but + * it doesn't tell us its size. Assume it's equal the size of the + * request. + */ + icmplen = sizeof(*icmph) + pong->reqsize; + p = pbuf_alloc(PBUF_IP, (u16_t)icmplen, PBUF_RAM); + if (RT_UNLIKELY(p == NULL)) { + return; + } + + icmph = (struct icmp6_echo_hdr *)p->payload; + icmph->type = ICMP6_TYPE_EREP; + icmph->code = 0; + icmph->chksum = 0; + icmph->id = pong->reqicmph.id; + icmph->seqno = pong->reqicmph.seqno; + + memcpy((u8_t *)p->payload + sizeof(*icmph), + pong->buf + sizeof(*reply), pong->reqsize); + + icmph->chksum = ip6_chksum_pseudo(p, IP6_NEXTH_ICMP6, p->tot_len, + &src, &pong->reqsrc); + ip6_output_if(p, /* :src */ &src, /* :dst */ &pong->reqsrc, + LWIP_ICMP6_HL, 0, IP6_NEXTH_ICMP6, + pong->netif); + pbuf_free(p); +} diff --git a/src/VBox/NetworkServices/NAT/pxremap.c b/src/VBox/NetworkServices/NAT/pxremap.c new file mode 100644 index 00000000..616bad9f --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxremap.c @@ -0,0 +1,305 @@ +/** -*- indent-tabs-mode: nil; -*- + * + * This file contains functions pertinent to magic address remapping. + * + * We want to expose host's loopback interfaces to the guest by + * mapping them to the addresses from the same prefix/subnet, so if, + * for example proxy interface is 10.0.2.1, we redirect traffic to + * 10.0.2.2 to host's 127.0.0.1 loopback. If need be, we may extend + * this to provide additional mappings, e.g. 127.0.1.1 loopback + * address is used on Ubuntu 12.10+ for NetworkManager's dnsmasq. + * + * Ditto for IPv6, except that IPv6 only has one loopback address. + */ +#include "winutils.h" +#include "pxremap.h" +#include "proxy.h" + +#include "lwip/netif.h" +#include "netif/etharp.h" /* proxy arp hook */ + +#include "lwip/ip4.h" /* IPv4 divert hook */ +#include "lwip/ip6.h" /* IPv6 divert hook */ + +#include <string.h> + + +/** + * Check if "dst" is an IPv4 address that proxy remaps to host's + * loopback. + */ +static int +proxy_ip4_is_mapped_loopback(struct netif *netif, const ip_addr_t *dst, ip_addr_t *lo) +{ + u32_t off; + const struct ip4_lomap *lomap; + size_t i; + + LWIP_ASSERT1(dst != NULL); + + if (g_proxy_options->lomap_desc == NULL) { + return 0; + } + + if (!ip_addr_netcmp(dst, &netif->ip_addr, &netif->netmask)) { + return 0; + } + + /* XXX: TODO: check netif is a proxying netif! */ + + off = ntohl(ip4_addr_get_u32(dst) & ~ip4_addr_get_u32(&netif->netmask)); + lomap = g_proxy_options->lomap_desc->lomap; + for (i = 0; i < g_proxy_options->lomap_desc->num_lomap; ++i) { + if (off == lomap[i].off) { + if (lo != NULL) { + ip_addr_copy(*lo, lomap[i].loaddr); + } + return 1; + } + } + return 0; +} + + +#if ARP_PROXY +/** + * Hook function for etharp_arp_input() - returns true to cause proxy + * ARP reply to be generated for "dst". + */ +int +pxremap_proxy_arp(struct netif *netif, ip_addr_t *dst) +{ + return proxy_ip4_is_mapped_loopback(netif, dst, NULL); +} +#endif /* ARP_PROXY */ + + +/** + * Hook function for ip_forward() - returns true to divert packets to + * "dst" to proxy (instead of forwarding them via "netif" or dropping). + */ +int +pxremap_ip4_divert(struct netif *netif, ip_addr_t *dst) +{ + return proxy_ip4_is_mapped_loopback(netif, dst, NULL); +} + + +/** + * Mapping from local network to loopback for outbound connections. + * + * Copy "src" to "dst" with ip_addr_set(dst, src), but if "src" is a + * local network address that maps host's loopback address, copy + * loopback address to "dst". + */ +int +pxremap_outbound_ip4(ip_addr_t *dst, ip_addr_t *src) +{ + struct netif *netif; + + LWIP_ASSERT1(dst != NULL); + LWIP_ASSERT1(src != NULL); + + for (netif = netif_list; netif != NULL; netif = netif->next) { + if (netif_is_up(netif) /* && this is a proxy netif */) { + if (proxy_ip4_is_mapped_loopback(netif, src, dst)) { + return PXREMAP_MAPPED; + } + } + } + + /* not remapped, just copy src */ + ip_addr_set(dst, src); + return PXREMAP_ASIS; +} + + +/** + * Mapping from loopback to local network for inbound (port-forwarded) + * connections. + * + * Copy "src" to "dst" with ip_addr_set(dst, src), but if "src" is a + * host's loopback address, copy local network address that maps it to + * "dst". + */ +int +pxremap_inbound_ip4(ip_addr_t *dst, ip_addr_t *src) +{ + struct netif *netif; + const struct ip4_lomap *lomap; + unsigned int i; + + if (ip4_addr1(src) != IP_LOOPBACKNET) { + ip_addr_set(dst, src); + return PXREMAP_ASIS; + } + + if (g_proxy_options->lomap_desc == NULL) { + return PXREMAP_FAILED; + } + +#if 0 /* ?TODO: with multiple interfaces we need to consider fwspec::dst */ + netif = ip_route(target); + if (netif == NULL) { + return PXREMAP_FAILED; + } +#else + netif = netif_list; + LWIP_ASSERT1(netif != NULL); + LWIP_ASSERT1(netif->next == NULL); +#endif + + lomap = g_proxy_options->lomap_desc->lomap; + for (i = 0; i < g_proxy_options->lomap_desc->num_lomap; ++i) { + if (ip_addr_cmp(src, &lomap[i].loaddr)) { + ip_addr_t net; + + ip_addr_get_network(&net, &netif->ip_addr, &netif->netmask); + ip4_addr_set_u32(dst, + htonl(ntohl(ip4_addr_get_u32(&net)) + + lomap[i].off)); + return PXREMAP_MAPPED; + } + } + + return PXREMAP_FAILED; +} + + +static int +proxy_ip6_is_mapped_loopback(struct netif *netif, ip6_addr_t *dst) +{ + int i; + + /* XXX: TODO: check netif is a proxying netif! */ + + LWIP_ASSERT1(dst != NULL); + + for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i)) + && ip6_addr_isuniquelocal(netif_ip6_addr(netif, i))) + { + ip6_addr_t *ifaddr = netif_ip6_addr(netif, i); + if (memcmp(dst, ifaddr, sizeof(ip6_addr_t) - 1) == 0 + && ((IP6_ADDR_BLOCK8(dst) & 0xff) + == (IP6_ADDR_BLOCK8(ifaddr) & 0xff) + 1)) + { + return 1; + } + } + } + + return 0; +} + + +/** + * Hook function for nd6_input() - returns true to cause proxy NA + * reply to be generated for "dst". + */ +int +pxremap_proxy_na(struct netif *netif, ip6_addr_t *dst) +{ + return proxy_ip6_is_mapped_loopback(netif, dst); +} + + +/** + * Hook function for ip6_forward() - returns true to divert packets to + * "dst" to proxy (instead of forwarding them via "netif" or dropping). + */ +int +pxremap_ip6_divert(struct netif *netif, ip6_addr_t *dst) +{ + return proxy_ip6_is_mapped_loopback(netif, dst); +} + + +/** + * Mapping from local network to loopback for outbound connections. + * + * Copy "src" to "dst" with ip6_addr_set(dst, src), but if "src" is a + * local network address that maps host's loopback address, copy IPv6 + * loopback address to "dst". + */ +int +pxremap_outbound_ip6(ip6_addr_t *dst, ip6_addr_t *src) +{ + struct netif *netif; + int i; + + LWIP_ASSERT1(dst != NULL); + LWIP_ASSERT1(src != NULL); + + for (netif = netif_list; netif != NULL; netif = netif->next) { + if (!netif_is_up(netif) /* || this is not a proxy netif */) { + continue; + } + + for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i)) + && ip6_addr_isuniquelocal(netif_ip6_addr(netif, i))) + { + ip6_addr_t *ifaddr = netif_ip6_addr(netif, i); + if (memcmp(src, ifaddr, sizeof(ip6_addr_t) - 1) == 0 + && ((IP6_ADDR_BLOCK8(src) & 0xff) + == (IP6_ADDR_BLOCK8(ifaddr) & 0xff) + 1)) + { + ip6_addr_set_loopback(dst); + return PXREMAP_MAPPED; + } + } + } + } + + /* not remapped, just copy src */ + ip6_addr_set(dst, src); + return PXREMAP_ASIS; +} + + +/** + * Mapping from loopback to local network for inbound (port-forwarded) + * connections. + * + * Copy "src" to "dst" with ip6_addr_set(dst, src), but if "src" is a + * host's loopback address, copy local network address that maps it to + * "dst". + */ +int +pxremap_inbound_ip6(ip6_addr_t *dst, ip6_addr_t *src) +{ + ip6_addr_t loopback; + struct netif *netif; + int i; + + ip6_addr_set_loopback(&loopback); + if (!ip6_addr_cmp(src, &loopback)) { + ip6_addr_set(dst, src); + return PXREMAP_ASIS; + } + +#if 0 /* ?TODO: with multiple interfaces we need to consider fwspec::dst */ + netif = ip6_route_fwd(target); + if (netif == NULL) { + return PXREMAP_FAILED; + } +#else + netif = netif_list; + LWIP_ASSERT1(netif != NULL); + LWIP_ASSERT1(netif->next == NULL); +#endif + + for (i = 0; i < LWIP_IPV6_NUM_ADDRESSES; ++i) { + ip6_addr_t *ifaddr = netif_ip6_addr(netif, i); + if (ip6_addr_ispreferred(netif_ip6_addr_state(netif, i)) + && ip6_addr_isuniquelocal(ifaddr)) + { + ip6_addr_set(dst, ifaddr); + ++((u8_t *)&dst->addr[3])[3]; + return PXREMAP_MAPPED; + } + } + + return PXREMAP_FAILED; +} diff --git a/src/VBox/NetworkServices/NAT/pxremap.h b/src/VBox/NetworkServices/NAT/pxremap.h new file mode 100644 index 00000000..6c9b61e8 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxremap.h @@ -0,0 +1,33 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#ifndef _pxremap_h_ +#define _pxremap_h_ + +#include "lwip/err.h" +#include "lwip/ip_addr.h" + +struct netif; + + +#define PXREMAP_FAILED (-1) +#define PXREMAP_ASIS 0 +#define PXREMAP_MAPPED 1 + +/* IPv4 */ +#if ARP_PROXY +int pxremap_proxy_arp(struct netif *netif, ip_addr_t *dst); +#endif +int pxremap_ip4_divert(struct netif *netif, ip_addr_t *dst); +int pxremap_outbound_ip4(ip_addr_t *dst, ip_addr_t *src); +int pxremap_inbound_ip4(ip_addr_t *dst, ip_addr_t *src); + +/* IPv6 */ +int pxremap_proxy_na(struct netif *netif, ip6_addr_t *dst); +int pxremap_ip6_divert(struct netif *netif, ip6_addr_t *dst); +int pxremap_outbound_ip6(ip6_addr_t *dst, ip6_addr_t *src); +int pxremap_inbound_ip6(ip6_addr_t *dst, ip6_addr_t *src); + +#define pxremap_outbound_ipX(is_ipv6, dst, src) \ + ((is_ipv6) ? pxremap_outbound_ip6(&(dst)->ip6, &(src)->ip6) \ + : pxremap_outbound_ip4(&(dst)->ip4, &(src)->ip4)) + +#endif /* _pxremap_h_ */ diff --git a/src/VBox/NetworkServices/NAT/pxtcp.c b/src/VBox/NetworkServices/NAT/pxtcp.c new file mode 100644 index 00000000..8bdc5581 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxtcp.c @@ -0,0 +1,2392 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "winutils.h" + +#include "pxtcp.h" + +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxremap.h" +#include "portfwd.h" /* fwspec */ + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#ifdef RT_OS_SOLARIS +#include <sys/filio.h> /* FIONREAD is BSD'ism */ +#endif +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <poll.h> + +#include <err.h> /* BSD'ism */ +#else +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <iprt/stdint.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/netif.h" +#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */ +#include "lwip/icmp.h" +#include "lwip/icmp6.h" + +/* NetBSD doesn't report POLLHUP for TCP sockets */ +#ifdef __NetBSD__ +# define HAVE_TCP_POLLHUP 0 +#else +# define HAVE_TCP_POLLHUP 1 +#endif + + +/** + * Ring buffer for inbound data. Filled with data from the host + * socket on poll manager thread. Data consumed by scheduling + * tcp_write() to the pcb on the lwip thread. + * + * NB: There is actually third party present, the lwip stack itself. + * Thus the buffer doesn't have dual free vs. data split, but rather + * three-way free / send and unACKed data / unsent data split. + */ +struct ringbuf { + char *buf; + size_t bufsize; + + /* + * Start of free space, producer writes here (up till "unacked"). + */ + volatile size_t vacant; + + /* + * Start of sent but unacknowledged data. The data are "owned" by + * the stack as it may need to retransmit. This is the free space + * limit for producer. + */ + volatile size_t unacked; + + /* + * Start of unsent data, consumer reads/sends from here (up till + * "vacant"). Not declared volatile since it's only accessed from + * the consumer thread. + */ + size_t unsent; +}; + + +/** + */ +struct pxtcp { + /** + * Our poll manager handler. Must be first, strong/weak + * references depend on this "inheritance". + */ + struct pollmgr_handler pmhdl; + + /** + * lwIP (internal/guest) side of the proxied connection. + */ + struct tcp_pcb *pcb; + + /** + * Host (external) side of the proxied connection. + */ + SOCKET sock; + + /** + * Socket events we are currently polling for. + */ + int events; + + /** + * Socket error. Currently used to save connect(2) errors so that + * we can decide if we need to send ICMP error. + */ + int sockerr; + + /** + * Interface that we have got the SYN from. Needed to send ICMP + * with correct source address. + */ + struct netif *netif; + + /** + * For tentatively accepted connections for which we are in + * process of connecting to the real destination this is the + * initial pbuf that we might need to build ICMP error. + * + * When connection is established this is used to hold outbound + * pbuf chain received by pxtcp_pcb_recv() but not yet completely + * forwarded over the socket. We cannot "return" it to lwIP since + * the head of the chain is already sent and freed. + */ + struct pbuf *unsent; + + /** + * Guest has closed its side. Reported to pxtcp_pcb_recv() only + * once and we might not be able to forward it immediately if we + * have unsent pbuf. + */ + int outbound_close; + + /** + * Outbound half-close has been done on the socket. + */ + int outbound_close_done; + + /** + * External has closed its side. We might not be able to forward + * it immediately if we have unforwarded data. + */ + int inbound_close; + + /** + * Inbound half-close has been done on the pcb. + */ + int inbound_close_done; + + /** + * On systems that report POLLHUP as soon as the final FIN is + * received on a socket we cannot continue polling for the rest of + * input, so we have to read (pull) last data from the socket on + * the lwIP thread instead of polling/pushing it from the poll + * manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case. + */ + int inbound_pull; + + + /** + * When poll manager schedules delete we may not be able to delete + * a pxtcp immediately if not all inbound data has been acked by + * the guest: lwIP may need to resend and the data are in pxtcp's + * inbuf::buf. We defer delete until all data are acked to + * pxtcp_pcb_sent(). + * + * It's also implied by inbound_pull. It probably means that + * "deferred" is not a very fortunate name. + */ + int deferred_delete; + + /** + * Ring-buffer for inbound data. + */ + struct ringbuf inbuf; + + /** + * lwIP thread's strong reference to us. + */ + struct pollmgr_refptr *rp; + + + /* + * We use static messages to call functions on the lwIP thread to + * void malloc/free overhead. + */ + struct tcpip_msg msg_delete; /* delete pxtcp */ + struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */ + struct tcpip_msg msg_accept; /* confirm accept of proxied connection */ + struct tcpip_msg msg_outbound; /* trigger send of outbound data */ + struct tcpip_msg msg_inbound; /* trigger send of inbound data */ + struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */ +}; + + + +static struct pxtcp *pxtcp_allocate(void); +static void pxtcp_free(struct pxtcp *); + +static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *); +static void pxtcp_pcb_dissociate(struct pxtcp *); + +/* poll manager callbacks for pxtcp related channels */ +static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int); +static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int); +static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int); +#if !HAVE_TCP_POLLHUP +static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int); +#endif +static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int); + +/* helper functions for sending/receiving pxtcp over poll manager channels */ +static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *); +static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *); +static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int); +static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int); + +/* poll manager callbacks for individual sockets */ +static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int); +static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int); + +static ssize_t pxtcp_sock_read(struct pxtcp *, int *); + +/* convenience functions for poll manager callbacks */ +static int pxtcp_schedule_delete(struct pxtcp *); +static int pxtcp_schedule_reset(struct pxtcp *); +static int pxtcp_schedule_reject(struct pxtcp *); + +/* lwip thread callbacks called via proxy_lwip_post() */ +static void pxtcp_pcb_delete_pxtcp(void *); +static void pxtcp_pcb_reset_pxtcp(void *); +static void pxtcp_pcb_accept_refuse(void *); +static void pxtcp_pcb_accept_confirm(void *); +static void pxtcp_pcb_write_outbound(void *); +static void pxtcp_pcb_write_inbound(void *); +static void pxtcp_pcb_pull_inbound(void *); + +/* tcp pcb callbacks */ +static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */ +static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t); +static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t); +static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t); +static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t); +static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *); +static void pxtcp_pcb_err(void *, err_t); + +static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *); +static void pxtcp_pcb_forward_outbound_close(struct pxtcp *); + +static void pxtcp_pcb_forward_inbound(struct pxtcp *); +static void pxtcp_pcb_forward_inbound_close(struct pxtcp *); +DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *); +static void pxtcp_pcb_schedule_poll(struct pxtcp *); +static void pxtcp_pcb_cancel_poll(struct pxtcp *); + +static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int); +DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *); + +/* poll manager handlers for pxtcp channels */ +static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl; +static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl; +static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl; +#if !HAVE_TCP_POLLHUP +static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl; +#endif +static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl; + + +/** + * Init PXTCP - must be run when neither lwIP tcpip thread, nor poll + * manager threads haven't been created yet. + */ +void +pxtcp_init(void) +{ + /* + * Create channels. + */ +#define CHANNEL(SLOT, NAME) do { \ + NAME##_hdl.callback = NAME; \ + NAME##_hdl.data = NULL; \ + NAME##_hdl.slot = -1; \ + pollmgr_add_chan(SLOT, &NAME##_hdl); \ + } while (0) + + CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add); + CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin); + CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout); +#if !HAVE_TCP_POLLHUP + CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del); +#endif + CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset); + +#undef CHANNEL + + /* + * Listen to outgoing connection from guest(s). + */ + tcp_proxy_accept(pxtcp_pcb_heard); +} + + +/** + * Syntactic sugar for sending pxtcp pointer over poll manager + * channel. Used by lwip thread functions. + */ +static ssize_t +pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp) +{ + return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp)); +} + + +/** + * Syntactic sugar for sending weak reference to pxtcp over poll + * manager channel. Used by lwip thread functions. + */ +static ssize_t +pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp) +{ + pollmgr_refptr_weak_ref(pxtcp->rp); + return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp)); +} + + +/** + * Counterpart of pxtcp_chan_send(). + */ +static struct pxtcp * +pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents); + return pxtcp; +} + + +/** + * Counterpart of pxtcp_chan_send_weak(). + */ +static struct pxtcp * +pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pollmgr_refptr *rp; + struct pollmgr_handler *base; + struct pxtcp *pxtcp; + + rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents); + base = (struct pollmgr_handler *)pollmgr_refptr_get(rp); + pxtcp = (struct pxtcp *)base; + + return pxtcp; +} + + +/** + * Register pxtcp with poll manager. + * + * Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since + * error handling is different in these two cases, we leave it up to + * the caller. + */ +int +pxtcp_pmgr_add(struct pxtcp *pxtcp) +{ + int status; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->sock >= 0); + LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL); + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + LWIP_ASSERT1(pxtcp->pmhdl.slot < 0); + + status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events); + return status; +} + + +/** + * Unregister pxtcp with poll manager. + * + * Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error + * leg). + */ +void +pxtcp_pmgr_del(struct pxtcp *pxtcp) +{ + LWIP_ASSERT1(pxtcp != NULL); + + pollmgr_del_slot(pxtcp->pmhdl.slot); +} + + +/** + * POLLMGR_CHAN_PXTCP_ADD handler. + * + * Get new pxtcp from lwip thread and start polling its socket. + */ +static int +pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + int status; + + pxtcp = pxtcp_chan_recv(handler, fd, revents); + DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n", + (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + status = pxtcp_pmgr_add(pxtcp); + if (status < 0) { + (void) pxtcp_schedule_reset(pxtcp); + } + + return POLLIN; +} + + +/** + * POLLMGR_CHAN_PXTCP_POLLOUT handler. + * + * pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data + * and failed, it now requests us to poll the socket for POLLOUT and + * schedule pxtcp_pcb_forward_outbound() when sock is writable again. + */ +static int +pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_chan_recv_strong(handler, fd, revents); + DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp)); + + if (pxtcp == NULL) { + return POLLIN; + } + + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + LWIP_ASSERT1(pxtcp->pmhdl.slot > 0); + + pxtcp->events |= POLLOUT; + pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events); + + return POLLIN; +} + + +/** + * POLLMGR_CHAN_PXTCP_POLLIN handler. + */ +static int +pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_chan_recv_strong(handler, fd, revents); + DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp)); + + if (pxtcp == NULL) { + return POLLIN; + } + + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + LWIP_ASSERT1(pxtcp->pmhdl.slot > 0); + + if (pxtcp->inbound_close) { + return POLLIN; + } + + pxtcp->events |= POLLIN; + pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events); + + return POLLIN; +} + + +#if !HAVE_TCP_POLLHUP +/** + * POLLMGR_CHAN_PXTCP_DEL handler. + * + * Schedule pxtcp deletion. We only need this if host system doesn't + * report POLLHUP for fully closed tcp sockets. + */ +static int +pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_chan_recv_strong(handler, fd, revents); + if (pxtcp == NULL) { + return POLLIN; + } + + DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n", + (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL); + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + + LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */ + LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */ + + pxtcp_pmgr_del(pxtcp); + (void) pxtcp_schedule_delete(pxtcp); + + return POLLIN; +} +#endif /* !HAVE_TCP_POLLHUP */ + + +/** + * POLLMGR_CHAN_PXTCP_RESET handler. + * + * Close the socket with RST and delete pxtcp. + */ +static int +pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_chan_recv_strong(handler, fd, revents); + if (pxtcp == NULL) { + return POLLIN; + } + + DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n", + (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL); + LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp); + + pxtcp_pmgr_del(pxtcp); + + proxy_reset_socket(pxtcp->sock); + pxtcp->sock = INVALID_SOCKET; + + (void) pxtcp_schedule_reset(pxtcp); + + return POLLIN; +} + + +static struct pxtcp * +pxtcp_allocate(void) +{ + struct pxtcp *pxtcp; + + pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp)); + if (pxtcp == NULL) { + return NULL; + } + + pxtcp->pmhdl.callback = NULL; + pxtcp->pmhdl.data = (void *)pxtcp; + pxtcp->pmhdl.slot = -1; + + pxtcp->pcb = NULL; + pxtcp->sock = INVALID_SOCKET; + pxtcp->events = 0; + pxtcp->sockerr = 0; + pxtcp->netif = NULL; + pxtcp->unsent = NULL; + pxtcp->outbound_close = 0; + pxtcp->outbound_close_done = 0; + pxtcp->inbound_close = 0; + pxtcp->inbound_close_done = 0; + pxtcp->inbound_pull = 0; + pxtcp->deferred_delete = 0; + + pxtcp->inbuf.bufsize = 64 * 1024; + pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize); + if (pxtcp->inbuf.buf == NULL) { + free(pxtcp); + return NULL; + } + pxtcp->inbuf.vacant = 0; + pxtcp->inbuf.unacked = 0; + pxtcp->inbuf.unsent = 0; + + pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl); + if (pxtcp->rp == NULL) { + free(pxtcp->inbuf.buf); + free(pxtcp); + return NULL; + } + +#define CALLBACK_MSG(MSG, FUNC) \ + do { \ + pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \ + pxtcp->MSG.sem = NULL; \ + pxtcp->MSG.msg.cb.function = FUNC; \ + pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \ + } while (0) + + CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp); + CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp); + CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm); + CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound); + CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound); + CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound); + +#undef CALLBACK_MSG + + return pxtcp; +} + + +/** + * Exported to fwtcp to create pxtcp for incoming port-forwarded + * connections. Completed with pcb in pxtcp_pcb_connect(). + */ +struct pxtcp * +pxtcp_create_forwarded(SOCKET sock) +{ + struct pxtcp *pxtcp; + + pxtcp = pxtcp_allocate(); + if (pxtcp == NULL) { + return NULL; + } + + pxtcp->sock = sock; + pxtcp->pmhdl.callback = pxtcp_pmgr_pump; + pxtcp->events = 0; + + return pxtcp; +} + + +static void +pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb) +{ + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pcb != NULL); + + pxtcp->pcb = pcb; + + tcp_arg(pcb, pxtcp); + + tcp_recv(pcb, pxtcp_pcb_recv); + tcp_sent(pcb, pxtcp_pcb_sent); + tcp_poll(pcb, NULL, 255); + tcp_err(pcb, pxtcp_pcb_err); +} + + +static void +pxtcp_free(struct pxtcp *pxtcp) +{ + if (pxtcp->unsent != NULL) { + pbuf_free(pxtcp->unsent); + } + if (pxtcp->inbuf.buf != NULL) { + free(pxtcp->inbuf.buf); + } + free(pxtcp); +} + + +/** + * Counterpart to pxtcp_create_forwarded() to destruct pxtcp that + * fwtcp failed to register with poll manager to post to lwip thread + * for doing connect. + */ +void +pxtcp_cancel_forwarded(struct pxtcp *pxtcp) +{ + LWIP_ASSERT1(pxtcp->pcb == NULL); + pxtcp_pcb_reset_pxtcp(pxtcp); +} + + +static void +pxtcp_pcb_dissociate(struct pxtcp *pxtcp) +{ + if (pxtcp == NULL || pxtcp->pcb == NULL) { + return; + } + + DPRINTF(("%s: pxtcp %p <-> pcb %p\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + + /* + * We must have dissociated from a fully closed pcb immediately + * since lwip recycles them and we don't wan't to mess with what + * would be someone else's pcb that we happen to have a stale + * pointer to. + */ + LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp); + + tcp_recv(pxtcp->pcb, NULL); + tcp_sent(pxtcp->pcb, NULL); + tcp_poll(pxtcp->pcb, NULL, 255); + tcp_err(pxtcp->pcb, NULL); + tcp_arg(pxtcp->pcb, NULL); + pxtcp->pcb = NULL; +} + + +/** + * Lwip thread callback invoked via pxtcp::msg_delete + * + * Since we use static messages to communicate to the lwip thread, we + * cannot delete pxtcp without making sure there are no unprocessed + * messages in the lwip thread mailbox. + * + * The easiest way to ensure that is to send this "delete" message as + * the last one and when it's processed we know there are no more and + * it's safe to delete pxtcp. + * + * Poll manager handlers should use pxtcp_schedule_delete() + * convenience function. + */ +static void +pxtcp_pcb_delete_pxtcp(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + + DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock, + (pxtcp->deferred_delete && !pxtcp->inbound_pull + ? " (was deferred)" : ""))); + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pmhdl.slot < 0); + LWIP_ASSERT1(pxtcp->outbound_close_done); + LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */ + + + /* + * pxtcp is no longer registered with poll manager, so it's safe + * to close the socket. + */ + if (pxtcp->sock != INVALID_SOCKET) { + closesocket(pxtcp->sock); + pxtcp->sock = INVALID_SOCKET; + } + + /* + * We might have already dissociated from a fully closed pcb, or + * guest might have sent us a reset while msg_delete was in + * transit. If there's no pcb, we are done. + */ + if (pxtcp->pcb == NULL) { + pollmgr_refptr_unref(pxtcp->rp); + pxtcp_free(pxtcp); + return; + } + + /* + * Have we completely forwarded all inbound traffic to the guest? + * + * We may still be waiting for ACKs. We may have failed to send + * some of the data (tcp_write() failed with ERR_MEM). We may + * have failed to send the FIN (tcp_shutdown() failed with + * ERR_MEM). + */ + if (pxtcp_pcb_forward_inbound_done(pxtcp)) { + pxtcp_pcb_dissociate(pxtcp); + pollmgr_refptr_unref(pxtcp->rp); + pxtcp_free(pxtcp); + } + else { + DPRINTF2(("delete: pxtcp %p; pcb %p:" + " unacked %d, unsent %d, vacant %d, %s - DEFER!\n", + (void *)pxtcp, (void *)pxtcp->pcb, + (int)pxtcp->inbuf.unacked, + (int)pxtcp->inbuf.unsent, + (int)pxtcp->inbuf.vacant, + pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent")); + + LWIP_ASSERT1(!pxtcp->deferred_delete); + pxtcp->deferred_delete = 1; + } +} + + +/** + * If we couldn't delete pxtcp right away in the msg_delete callback + * from the poll manager thread, we repeat the check at the end of + * relevant pcb callbacks. + */ +DECLINLINE(void) +pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp) +{ + if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) { + pxtcp_pcb_delete_pxtcp(pxtcp); + } +} + + +/** + * Poll manager callbacks should use this convenience wrapper to + * schedule pxtcp deletion on the lwip thread and to deregister from + * the poll manager. + */ +static int +pxtcp_schedule_delete(struct pxtcp *pxtcp) +{ + /* + * If pollmgr_refptr_get() is called by any channel before + * scheduled deletion happens, let them know we are gone. + */ + pxtcp->pmhdl.slot = -1; + + /* + * Schedule deletion. Since poll manager thread may be pre-empted + * right after we send the message, the deletion may actually + * happen on the lwip thread before we return from this function, + * so it's not safe to refer to pxtcp after this call. + */ + proxy_lwip_post(&pxtcp->msg_delete); + + /* tell poll manager to deregister us */ + return -1; +} + + +/** + * Lwip thread callback invoked via pxtcp::msg_reset + * + * Like pxtcp_pcb_delete(), but sends RST to the guest before + * deleting this pxtcp. + */ +static void +pxtcp_pcb_reset_pxtcp(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + LWIP_ASSERT1(pxtcp != NULL); + + DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + if (pxtcp->sock != INVALID_SOCKET) { + proxy_reset_socket(pxtcp->sock); + pxtcp->sock = INVALID_SOCKET; + } + + if (pxtcp->pcb != NULL) { + struct tcp_pcb *pcb = pxtcp->pcb; + pxtcp_pcb_dissociate(pxtcp); + tcp_abort(pcb); + } + + pollmgr_refptr_unref(pxtcp->rp); + pxtcp_free(pxtcp); +} + + + +/** + * Poll manager callbacks should use this convenience wrapper to + * schedule pxtcp reset and deletion on the lwip thread and to + * deregister from the poll manager. + * + * See pxtcp_schedule_delete() for additional comments. + */ +static int +pxtcp_schedule_reset(struct pxtcp *pxtcp) +{ + pxtcp->pmhdl.slot = -1; + proxy_lwip_post(&pxtcp->msg_reset); + return -1; +} + + +/** + * Reject proxy connection attempt. Depending on the cause (sockerr) + * we may just drop the pcb silently, generate an ICMP datagram or + * send TCP reset. + */ +static void +pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb, + struct pbuf *p, int sockerr) +{ + struct netif *oif; + int reset = 0; + + oif = ip_current_netif(); + ip_current_netif() = netif; + + if (sockerr == ECONNREFUSED) { + reset = 1; + } + else if (PCB_ISIPV6(pcb)) { + if (sockerr == EHOSTDOWN) { + icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */ + } + else if (sockerr == EHOSTUNREACH + || sockerr == ENETDOWN + || sockerr == ENETUNREACH) + { + icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE); + } + } + else { + if (sockerr == EHOSTDOWN + || sockerr == EHOSTUNREACH + || sockerr == ENETDOWN + || sockerr == ENETUNREACH) + { + icmp_dest_unreach(p, ICMP_DUR_HOST); + } + } + + ip_current_netif() = oif; + + tcp_abandon(pcb, reset); +} + + +/** + * Called from poll manager thread via pxtcp::msg_accept when proxy + * failed to connect to the destination. Also called when we failed + * to register pxtcp with poll manager. + * + * This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in + * how this unestablished connection is terminated. + */ +static void +pxtcp_pcb_accept_refuse(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + + DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: errno %d\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb, + pxtcp->sock, pxtcp->sockerr)); + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET); + + if (pxtcp->pcb != NULL) { + struct tcp_pcb *pcb = pxtcp->pcb; + pxtcp_pcb_dissociate(pxtcp); + pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr); + } + + pollmgr_refptr_unref(pxtcp->rp); + pxtcp_free(pxtcp); +} + + +/** + * Convenience wrapper for poll manager connect callback to reject + * connection attempt. + * + * Like pxtcp_schedule_reset(), but the callback is more discriminate + * in how this unestablished connection is terminated. + */ +static int +pxtcp_schedule_reject(struct pxtcp *pxtcp) +{ + pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse; + pxtcp->pmhdl.slot = -1; + proxy_lwip_post(&pxtcp->msg_accept); + return -1; +} + + +/** + * Global tcp_proxy_accept() callback for proxied outgoing TCP + * connections from guest(s). + */ +static err_t +pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error) +{ + struct pbuf *p = (struct pbuf *)arg; + struct pxtcp *pxtcp; + ipX_addr_t dst_addr; + int sdom; + SOCKET sock; + ssize_t nsent; + int sockerr = 0; + + LWIP_UNUSED_ARG(error); /* always ERR_OK */ + + /* + * TCP first calls accept callback when it receives the first SYN + * and "tentatively accepts" new proxied connection attempt. When + * proxy "confirms" the SYN and sends SYN|ACK and the guest + * replies with ACK the accept callback is called again, this time + * with the established connection. + */ + LWIP_ASSERT1(newpcb->state == SYN_RCVD_0); + tcp_accept(newpcb, pxtcp_pcb_accept); + tcp_arg(newpcb, NULL); + + tcp_setprio(newpcb, TCP_PRIO_MAX); + + pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip); + + sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET; + sock = proxy_connected_socket(sdom, SOCK_STREAM, + &dst_addr, newpcb->local_port); + if (sock == INVALID_SOCKET) { + sockerr = errno; + goto abort; + } + + pxtcp = pxtcp_allocate(); + if (pxtcp == NULL) { + proxy_reset_socket(sock); + goto abort; + } + + /* save initial datagram in case we need to reply with ICMP */ + pbuf_ref(p); + pxtcp->unsent = p; + pxtcp->netif = ip_current_netif(); + + pxtcp_pcb_associate(pxtcp, newpcb); + pxtcp->sock = sock; + + pxtcp->pmhdl.callback = pxtcp_pmgr_connect; + pxtcp->events = POLLOUT; + + nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp); + if (nsent < 0) { + pxtcp->sock = INVALID_SOCKET; + proxy_reset_socket(sock); + pxtcp_pcb_accept_refuse(pxtcp); + return ERR_ABRT; + } + + return ERR_OK; + + abort: + DPRINTF0(("%s: pcb %p, sock %d: errno %d\n", + __func__, (void *)newpcb, sock, sockerr)); + pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr); + return ERR_ABRT; +} + + +/** + * tcp_proxy_accept() callback for accepted proxied outgoing TCP + * connections from guest(s). This is "real" accept with three-way + * handshake completed. + */ +static err_t +pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + + LWIP_UNUSED_ARG(pcb); /* used only in asserts */ + LWIP_UNUSED_ARG(error); /* always ERR_OK */ + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb = pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + + /* send any inbound data that are already queued */ + pxtcp_pcb_forward_inbound(pxtcp); + return ERR_OK; +} + + +/** + * Initial poll manager callback for proxied outgoing TCP connections. + * pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this. + * + * Waits for connect(2) to the destination to complete. On success + * replaces itself with pxtcp_pmgr_pump() callback common to all + * established TCP connections. + */ +static int +pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + int sockerr; + + pxtcp = (struct pxtcp *)handler->data; + LWIP_ASSERT1(handler == &pxtcp->pmhdl); + LWIP_ASSERT1(fd == pxtcp->sock); + + if (revents & (POLLNVAL | POLLHUP | POLLERR)) { + if (revents & POLLNVAL) { + pxtcp->sock = INVALID_SOCKET; + pxtcp->sockerr = ETIMEDOUT; + } + else { + socklen_t optlen = (socklen_t)sizeof(sockerr); + int status; + SOCKET s; + + status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR, + (char *)&pxtcp->sockerr, &optlen); + if (status < 0) { /* should not happen */ + sockerr = errno; /* ??? */ + perror("connect: getsockopt"); + } + else { +#ifndef RT_OS_WINDOWS + errno = pxtcp->sockerr; /* to avoid strerror_r */ +#else + /* see winutils.h */ + WSASetLastError(pxtcp->sockerr); +#endif + perror("connect"); + } + s = pxtcp->sock; + pxtcp->sock = INVALID_SOCKET; + closesocket(s); + } + return pxtcp_schedule_reject(pxtcp); + } + + if (revents & POLLOUT) { /* connect is successful */ + /* confirm accept to the guest */ + proxy_lwip_post(&pxtcp->msg_accept); + + /* + * Switch to common callback used for all established proxied + * connections. + */ + pxtcp->pmhdl.callback = pxtcp_pmgr_pump; + + /* + * Initially we poll for incoming traffic only. Outgoing + * traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails + * it will ask us to poll for POLLOUT too. + */ + pxtcp->events = POLLIN; + return pxtcp->events; + } + + /* should never get here */ + DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n", + __func__, (void *)pxtcp, fd, revents)); + return pxtcp_schedule_reset(pxtcp); +} + + +/** + * Called from poll manager thread via pxtcp::msg_accept when proxy + * connected to the destination. Finalize accept by sending SYN|ACK + * to the guest. + */ +static void +pxtcp_pcb_accept_confirm(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + err_t error; + + LWIP_ASSERT1(pxtcp != NULL); + if (pxtcp->pcb == NULL) { + return; + } + + /* we are not going to reply with ICMP, so we can drop initial pbuf */ + LWIP_ASSERT1(pxtcp->unsent != NULL); + pbuf_free(pxtcp->unsent); + pxtcp->unsent = NULL; + + error = tcp_proxy_accept_confirm(pxtcp->pcb); + + /* + * If lwIP failed to enqueue SYN|ACK because it's out of pbufs it + * abandons the pcb. Retrying that is not very easy, since it + * would require keeping "fractional state". From guest's point + * of view there is no reply to its SYN so it will either resend + * the SYN (effetively triggering full connection retry for us), + * or it will eventually time out. + */ + if (error == ERR_ABRT) { + pxtcp->pcb = NULL; /* pcb is gone */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp); + } + + /* + * else if (error != ERR_OK): even if tcp_output() failed with + * ERR_MEM - don't give up, that SYN|ACK is enqueued and will be + * retransmitted eventually. + */ +} + + +/** + * Entry point for port-forwarding. + * + * fwtcp accepts new incoming connection, creates pxtcp for the socket + * (with no pcb yet) and adds it to the poll manager (polling for + * errors only). Then it calls this function to construct the pcb and + * perform connection to the guest. + */ +void +pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec) +{ + struct sockaddr_storage ss; + socklen_t sslen; + struct tcp_pcb *pcb; + ipX_addr_t src_addr, dst_addr; + u16_t src_port, dst_port; + int status; + err_t error; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb == NULL); + LWIP_ASSERT1(fwspec->stype == SOCK_STREAM); + + pcb = tcp_new(); + if (pcb == NULL) { + goto reset; + } + + tcp_setprio(pcb, TCP_PRIO_MAX); + pxtcp_pcb_associate(pxtcp, pcb); + + sslen = sizeof(ss); + status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen); + if (status == SOCKET_ERROR) { + goto reset; + } + + /* nit: comapres PF and AF, but they are the same everywhere */ + LWIP_ASSERT1(ss.ss_family == fwspec->sdom); + + status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss); + if (status == PXREMAP_FAILED) { + goto reset; + } + + if (ss.ss_family == PF_INET) { + const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss; + + src_port = peer4->sin_port; + + memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t)); + dst_port = fwspec->dst.sin.sin_port; + } + else { /* PF_INET6 */ + const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss; + ip_set_v6(pcb, 1); + + src_port = peer6->sin6_port; + + memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t)); + dst_port = fwspec->dst.sin6.sin6_port; + } + + /* lwip port arguments are in host order */ + src_port = ntohs(src_port); + dst_port = ntohs(dst_port); + + error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port); + if (error != ERR_OK) { + goto reset; + } + + error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port, + /* callback: */ pxtcp_pcb_connected); + if (error != ERR_OK) { + goto reset; + } + + return; + + reset: + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp); +} + + +/** + * Port-forwarded connection to guest is successful, pump data. + */ +static err_t +pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + + LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */ + LWIP_UNUSED_ARG(error); + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb == pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + LWIP_UNUSED_ARG(pcb); + + DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock)); + + /* ACK on connection is like ACK on data in pxtcp_pcb_sent() */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp); + + return ERR_OK; +} + + +/** + * tcp_recv() callback. + */ +static err_t +pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + + LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */ + LWIP_UNUSED_ARG(error); + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb == pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + LWIP_UNUSED_ARG(pcb); + + + /* + * Have we done sending previous batch? + */ + if (pxtcp->unsent != NULL) { + if (p != NULL) { + /* + * Return an error to tell TCP to hold onto that pbuf. + * It will be presented to us later from tcp_fasttmr(). + */ + return ERR_WOULDBLOCK; + } + else { + /* + * Unlike data, p == NULL indicating orderly shutdown is + * NOT presented to us again + */ + pxtcp->outbound_close = 1; + return ERR_OK; + } + } + + + /* + * Guest closed? + */ + if (p == NULL) { + pxtcp->outbound_close = 1; + pxtcp_pcb_forward_outbound_close(pxtcp); + return ERR_OK; + } + + + /* + * Got data, send what we can without blocking. + */ + return pxtcp_pcb_forward_outbound(pxtcp, p); +} + + +/** + * Guest half-closed its TX side of the connection. + * + * Called either immediately from pxtcp_pcb_recv() when it gets NULL, + * or from pxtcp_pcb_forward_outbound() when it finishes forwarding + * previously unsent data and sees pxtcp::outbound_close flag saved by + * pxtcp_pcb_recv(). + */ +static void +pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp) +{ + struct tcp_pcb *pcb; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->outbound_close); + LWIP_ASSERT1(!pxtcp->outbound_close_done); + + pcb = pxtcp->pcb; + LWIP_ASSERT1(pcb != NULL); + + DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n", + (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state))); + + + /* + * NB: set the flag first, since shutdown() will trigger POLLHUP + * if inbound is already closed, and poll manager asserts + * outbound_close_done (may be it should not?). + */ + pxtcp->outbound_close_done = 1; + shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */ + +#if !HAVE_TCP_POLLHUP + /* + * On NetBSD POLLHUP is not reported for TCP sockets, so we need + * to nudge poll manager manually. + */ + if (pxtcp->inbound_close) { + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp); + } +#endif + + + /* no more outbound data coming to us */ + tcp_recv(pcb, NULL); + + /* + * If we have already done inbound close previously (active close + * on the pcb), then we must not hold onto a pcb in TIME_WAIT + * state since those will be recycled by lwip when it runs out of + * free pcbs in the pool. + * + * The test is true also for a pcb in CLOSING state that waits + * just for the ACK of its FIN (to transition to TIME_WAIT). + */ + if (pxtcp_pcb_forward_inbound_done(pxtcp)) { + pxtcp_pcb_dissociate(pxtcp); + } +} + + +/** + * Forward outbound data from pcb to socket. + * + * Called by pxtcp_pcb_recv() to forward new data and by callout + * triggered by POLLOUT on the socket to send previously unsent data. + * + * (Re)scehdules one-time callout if not all data are sent. + */ +static err_t +pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p) +{ + struct pbuf *qs, *q; + size_t qoff; + size_t forwarded; + int sockerr; + +#if defined(MSG_NOSIGNAL) + const int send_flags = MSG_NOSIGNAL; +#else + const int send_flags = 0; +#endif + + + LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p); + + forwarded = 0; + sockerr = 0; + + q = NULL; + qoff = 0; + + qs = p; + while (qs != NULL) { +#ifndef RT_OS_WINDOWS + struct msghdr mh; +#else + int rc; +#endif + IOVEC iov[8]; + const size_t iovsize = sizeof(iov)/sizeof(iov[0]); + size_t fwd1; + ssize_t nsent; + size_t i; + + fwd1 = 0; + for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) { + LWIP_ASSERT1(q->len > 0); + IOVEC_SET_BASE(iov[i], q->payload); + IOVEC_SET_LEN(iov[i], q->len); + fwd1 += q->len; + } + +#ifndef RT_OS_WINDOWS + memset(&mh, 0, sizeof(mh)); + mh.msg_iov = iov; + mh.msg_iovlen = i; + + nsent = sendmsg(pxtcp->sock, &mh, send_flags); +#else + /** + * WSASend(,,,DWORD *,,,) - takes SSIZE_T (64bit value) ... so all nsent's + * bits should be zeroed before passing to WSASent. + */ + nsent = 0; + rc = WSASend(pxtcp->sock, iov, (DWORD)i, (DWORD *)&nsent, 0, NULL, NULL); + if (rc == SOCKET_ERROR) { + /* WSASent reports SOCKET_ERROR and updates error accessible with + * WSAGetLastError(). We assign nsent to -1, enforcing code below + * to access error in BSD style. + */ + warn("pxtcp_pcb_forward_outbound:WSASend error:%d nsent:%d\n", + WSAGetLastError(), + nsent); + nsent = -1; + } +#endif + + if (nsent == (ssize_t)fwd1) { + /* successfully sent this chain fragment completely */ + forwarded += nsent; + qs = q; + } + else if (nsent >= 0) { + /* successfully sent only some data */ + forwarded += nsent; + + /* find the first pbuf that was not completely forwarded */ + qoff = nsent; + for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) { + if (qoff < q->len) { + break; + } + qoff -= q->len; + } + LWIP_ASSERT1(q != NULL); + LWIP_ASSERT1(qoff < q->len); + break; + } + else { + /* + * Some errors are really not errors - if we get them, + * it's not different from getting nsent == 0, so filter + * them out here. + */ + if (errno != EWOULDBLOCK + && errno != EAGAIN + && errno != ENOBUFS + && errno != ENOMEM + && errno != EINTR) + { + sockerr = errno; + } + q = qs; + qoff = 0; + break; + } + } + + if (forwarded > 0) { + tcp_recved(pxtcp->pcb, (u16_t)forwarded); + } + + if (q == NULL) { /* everything is forwarded? */ + LWIP_ASSERT1(sockerr == 0); + LWIP_ASSERT1(forwarded == p->tot_len); + + pxtcp->unsent = NULL; + pbuf_free(p); + if (pxtcp->outbound_close) { + pxtcp_pcb_forward_outbound_close(pxtcp); + } + } + else { + if (q != p) { + /* free forwarded pbufs at the beginning of the chain */ + pbuf_ref(q); + pbuf_free(p); + } + if (qoff > 0) { + /* advance payload pointer past the forwarded part */ + pbuf_header(q, -(s16_t)qoff); + } + pxtcp->unsent = q; + + /* + * Have sendmsg() failed? + * + * Connection reset will be detected by poll and + * pxtcp_schedule_reset() will be called. + * + * Otherwise something *really* unexpected must have happened, + * so we'd better abort. + */ + if (sockerr != 0 && sockerr != ECONNRESET) { + struct tcp_pcb *pcb = pxtcp->pcb; + pxtcp_pcb_dissociate(pxtcp); + + tcp_abort(pcb); + + /* call error callback manually since we've already dissociated */ + pxtcp_pcb_err((void *)pxtcp, ERR_ABRT); + return ERR_ABRT; + } + + /* schedule one-shot POLLOUT on the socket */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp); + } + return ERR_OK; +} + + +/** + * Callback from poll manager (on POLLOUT) to send data from + * pxtcp::unsent pbuf to socket. + */ +static void +pxtcp_pcb_write_outbound(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + LWIP_ASSERT1(pxtcp != NULL); + + if (pxtcp->pcb == NULL) { + return; + } + + pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent); +} + + +/** + * Common poll manager callback used by both outgoing and incoming + * (port-forwarded) connections that has connected socket. + */ +static int +pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxtcp *pxtcp; + int status; + int sockerr; + + pxtcp = (struct pxtcp *)handler->data; + LWIP_ASSERT1(handler == &pxtcp->pmhdl); + LWIP_ASSERT1(fd == pxtcp->sock); + + if (revents & POLLNVAL) { + pxtcp->sock = INVALID_SOCKET; + return pxtcp_schedule_reset(pxtcp); + } + + if (revents & POLLERR) { + socklen_t optlen = (socklen_t)sizeof(sockerr); + + status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR, + (char *)&sockerr, &optlen); + if (status < 0) { /* should not happen */ + perror("getsockopt"); + sockerr = ECONNRESET; + } + + DPRINTF0(("sock %d: errno %d\n", fd, sockerr)); + return pxtcp_schedule_reset(pxtcp); + } + + if (revents & POLLOUT) { + pxtcp->events &= ~POLLOUT; + proxy_lwip_post(&pxtcp->msg_outbound); + } + + if (revents & POLLIN) { + ssize_t nread; + int stop_pollin; + + nread = pxtcp_sock_read(pxtcp, &stop_pollin); + if (nread < 0) { + sockerr = -(int)nread; + DPRINTF0(("sock %d: errno %d\n", fd, sockerr)); + return pxtcp_schedule_reset(pxtcp); + } + + if (stop_pollin) { + pxtcp->events &= ~POLLIN; + } + + if (nread > 0) { + proxy_lwip_post(&pxtcp->msg_inbound); +#if !HAVE_TCP_POLLHUP + /* + * If host does not report POLLHUP for closed sockets + * (e.g. NetBSD) we should check for full close manually. + */ + if (pxtcp->inbound_close && pxtcp->outbound_close_done) { + LWIP_ASSERT1((revents & POLLHUP) == 0); + return pxtcp_schedule_delete(pxtcp); + } +#endif + } + } + +#if !HAVE_TCP_POLLHUP + LWIP_ASSERT1((revents & POLLHUP) == 0); +#else + if (revents & POLLHUP) { + /* + * Linux and Darwin seems to report POLLHUP when both + * directions are shut down. And they do report POLLHUP even + * when there's unread data (which they aslo report as POLLIN + * along with that POLLHUP). + * + * FreeBSD (from source inspection) seems to follow Linux, + * reporting POLLHUP when both directions are shut down, but + * POLLHUP is always accompanied with POLLIN. + * + * NetBSD never reports POLLHUP for sockets. + * + * --- + * + * If external half-closes first, we don't get POLLHUP, we + * recv 0 bytes from the socket as EOF indicator, stop polling + * for POLLIN and poll with events == 0 (with occasional + * one-shot POLLOUT). When guest eventually closes, we get + * POLLHUP. + * + * If guest half-closes first things are more tricky. As soon + * as host sees the FIN from external it will spam POLLHUP, + * even when there's unread data. The problem is that we + * might have stopped polling for POLLIN because the ring + * buffer is full or we were polling POLLIN but can't read all + * of the data becuase buffer doesn't have enough space. + * Either way, there's unread data but we can't keep polling + * the socket. + */ + DPRINTF(("sock %d: HUP\n", fd)); + LWIP_ASSERT1(pxtcp->outbound_close_done); + + if (pxtcp->inbound_close) { + /* there's no unread data, we are done */ + return pxtcp_schedule_delete(pxtcp); + } + else { + /* DPRINTF */ { +#ifndef RT_OS_WINDOWS + int unread; +#else + u_long unread; +#endif + status = ioctlsocket(fd, FIONREAD, &unread); + if (status == SOCKET_ERROR) { + perror("FIONREAD"); + } + else { + DPRINTF2(("sock %d: %d UNREAD bytes\n", fd, unread)); + } + } + + /* + * We cannot just set a flag here and let pxtcp_pcb_sent() + * notice and start pulling, because if we are preempted + * before setting the flag and all data in inbuf is ACKed + * there will be no more calls to pxtcp_pcb_sent() to + * notice the flag. + * + * We cannot set a flag and then send a message to make + * sure it noticed, because if it has and it has read all + * data while the message is in transit it will delete + * pxtcp. + * + * In a sense this message is like msg_delete (except we + * ask to pull some data first). + */ + proxy_lwip_post(&pxtcp->msg_inpull); + pxtcp->pmhdl.slot = -1; + return -1; + } + /* NOTREACHED */ + } /* POLLHUP */ +#endif /* HAVE_TCP_POLLHUP */ + + return pxtcp->events; +} + + +/** + * Read data from socket to ringbuf. This may be used both on lwip + * and poll manager threads. + * + * Flag pointed to by pstop is set when further reading is impossible, + * either temporary when buffer is full, or permanently when EOF is + * received. + * + * Returns number of bytes read. NB: EOF is reported as 1! + * + * Returns zero if nothing was read, either because buffer is full, or + * if no data is available (EAGAIN, EINTR &c). + * + * Returns -errno on real socket errors. + */ +static ssize_t +pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop) +{ + IOVEC iov[2]; +#ifndef RT_OS_WINDOWS + struct msghdr mh; +#else + DWORD dwFlags; + int rc; +#endif + int iovlen; + ssize_t nread; + + const size_t sz = pxtcp->inbuf.bufsize; + size_t beg, lim, wrnew; + + *pstop = 0; + +#ifndef RT_OS_WINDOWS + memset(&mh, 0, sizeof(mh)); + mh.msg_iov = iov; +#endif + + beg = pxtcp->inbuf.vacant; + IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]); + + /* lim is the index we can NOT write to */ + lim = pxtcp->inbuf.unacked; + if (lim == 0) { + lim = sz - 1; /* empty slot at the end */ + } + else if (lim == 1) { + lim = sz; /* empty slot at the beginning */ + } + else { + --lim; + } + + if (beg == lim) { + /* + * Buffer is full, stop polling for POLLIN. + * + * pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs + * data, freeing space in the ring buffer. + */ + *pstop = 1; + return 0; + } + + if (beg < lim) { + /* free space in one chunk */ + iovlen = 1; + IOVEC_SET_LEN(iov[0], lim - beg); + } + else { + /* free space in two chunks */ + iovlen = 2; + IOVEC_SET_LEN(iov[0], sz - beg); + IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]); + IOVEC_SET_LEN(iov[1], lim); + } + +#ifndef RT_OS_WINDOWS + mh.msg_iovlen = iovlen; + nread = recvmsg(pxtcp->sock, &mh, 0); +#else + dwFlags = 0; + /* We can't assign nread to -1 expecting, that we'll got it back in case of error, + * instead, WSARecv(,,,DWORD *,,,) will rewrite only half of the 64bit value. + */ + nread = 0; + rc = WSARecv(pxtcp->sock, iov, iovlen, (DWORD *)&nread, &dwFlags, NULL, NULL); + if (rc == SOCKET_ERROR) { + warn("pxtcp_sock_read:WSARecv(%d) error:%d nread:%d\n", + pxtcp->sock, + WSAGetLastError(), + nread); + nread = -1; + } + + if (dwFlags) { + warn("pxtcp_sock_read:WSARecv(%d) dwFlags:%x nread:%d\n", + pxtcp->sock, + WSAGetLastError(), + nread); + } +#endif + + if (nread > 0) { + wrnew = beg + nread; + if (wrnew >= sz) { + wrnew -= sz; + } + pxtcp->inbuf.vacant = wrnew; + DPRINTF2(("pxtcp %p: sock %d read %d bytes\n", + (void *)pxtcp, pxtcp->sock, (int)nread)); + return nread; + } + else if (nread == 0) { + *pstop = 1; + pxtcp->inbound_close = 1; + DPRINTF2(("pxtcp %p: sock %d read EOF\n", + (void *)pxtcp, pxtcp->sock)); + return 1; + } + else if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) { + /* haven't read anything, just return */ + DPRINTF2(("pxtcp %p: sock %d read cancelled\n", + (void *)pxtcp, pxtcp->sock)); + return 0; + } + else { + /* socket error! */ + DPRINTF0(("pxtcp %p: sock %d read errno %d\n", + (void *)pxtcp, pxtcp->sock, errno)); + return -errno; + } +} + + +/** + * Callback from poll manager (pxtcp::msg_inbound) to trigger output + * from ringbuf to guest. + */ +static void +pxtcp_pcb_write_inbound(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + LWIP_ASSERT1(pxtcp != NULL); + + if (pxtcp->pcb == NULL) { + return; + } + + pxtcp_pcb_forward_inbound(pxtcp); +} + + +/** + * tcp_poll() callback + * + * We swtich it on when tcp_write() or tcp_shutdown() fail with + * ERR_MEM to prevent connection from stalling. If there are ACKs or + * more inbound data then pxtcp_pcb_forward_inbound() will be + * triggered again, but if neither happens, tcp_poll() comes to the + * rescue. + */ +static err_t +pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + LWIP_UNUSED_ARG(pcb); + + DPRINTF2(("%s: pxtcp %p; pcb %p\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + + pxtcp_pcb_forward_inbound(pxtcp); + + /* + * If the last thing holding up deletion of the pxtcp was failed + * tcp_shutdown() and it succeeded, we may be the last callback. + */ + pxtcp_pcb_maybe_deferred_delete(pxtcp); + + return ERR_OK; +} + + +static void +pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp) +{ + tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0); +} + + +static void +pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp) +{ + tcp_poll(pxtcp->pcb, NULL, 255); +} + + +/** + * Forward inbound data from ring buffer to the guest. + * + * Scheduled by poll manager thread after it receives more data into + * the ring buffer (we have more data to send). + + * Also called from tcp_sent() callback when guest ACKs some data, + * increasing pcb->snd_buf (we are permitted to send more data). + * + * Also called from tcp_poll() callback if previous attempt to forward + * inbound data failed with ERR_MEM (we need to try again). + */ +static void +pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp) +{ + struct tcp_pcb *pcb; + size_t sndbuf; + size_t beg, lim, sndlim; + size_t toeob, tolim; + size_t nsent; + err_t error; + + LWIP_ASSERT1(pxtcp != NULL); + pcb = pxtcp->pcb; + if (pcb == NULL) { + return; + } + + if (/* __predict_false */ pcb->state < ESTABLISHED) { + /* + * If we have just confirmed accept of this connection, the + * pcb is in SYN_RCVD state and we still haven't received the + * ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED + * transition that lwip decrements pcb->acked so that that ACK + * is not reported to pxtcp_pcb_sent(). If we send something + * now and immediately close (think "daytime", e.g.) while + * still in SYN_RCVD state, we will move directly to + * FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will + * report it to pxtcp_pcb_sent(). + */ + DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n", + (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state))); + return; + } + + + beg = pxtcp->inbuf.unsent; /* private to lwip thread */ + lim = pxtcp->inbuf.vacant; + + if (beg == lim) { + if (pxtcp->inbound_close && !pxtcp->inbound_close_done) { + pxtcp_pcb_forward_inbound_close(pxtcp); + tcp_output(pcb); + return; + } + + /* + * Else, there's no data to send. + * + * If there is free space in the buffer, producer will + * reschedule us as it receives more data and vacant (lim) + * advances. + * + * If buffer is full when all data have been passed to + * tcp_write() but not yet acknowledged, we will advance + * unacked on ACK, freeing some space for producer to write to + * (then see above). + */ + return; + } + + sndbuf = tcp_sndbuf(pcb); + if (sndbuf == 0) { + /* + * Can't send anything now. As guest ACKs some data, TCP will + * call pxtcp_pcb_sent() callback and we will come here again. + */ + return; + } + + nsent = 0; + + /* + * We have three limits to consider: + * - how much data we have in the ringbuf + * - how much data we are allowed to send + * - ringbuf size + */ + toeob = pxtcp->inbuf.bufsize - beg; + if (lim < beg) { /* lim wrapped */ + if (sndbuf < toeob) { /* but we are limited by sndbuf */ + /* so beg is not going to wrap, treat sndbuf as lim */ + lim = beg + sndbuf; /* ... and proceed to the simple case */ + } + else { /* we are limited by the end of the buffer, beg will wrap */ + u8_t maybemore; + if (toeob == sndbuf || lim == 0) { + maybemore = 0; + } + else { + maybemore = TCP_WRITE_FLAG_MORE; + } + + error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore); + if (error != ERR_OK) { + goto writeerr; + } + nsent += toeob; + pxtcp->inbuf.unsent = 0; /* wrap */ + + if (maybemore) { + beg = 0; + sndbuf -= toeob; + } + else { + /* we are done sending, but ... */ + goto check_inbound_close; + } + } + } + + LWIP_ASSERT1(beg < lim); + sndlim = beg + sndbuf; + if (lim > sndlim) { + lim = sndlim; + } + tolim = lim - beg; + if (tolim > 0) { + error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0); + if (error != ERR_OK) { + goto writeerr; + } + nsent += tolim; + pxtcp->inbuf.unsent = lim; + } + + check_inbound_close: + if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) { + pxtcp_pcb_forward_inbound_close(pxtcp); + } + + DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n", + (void *)pxtcp, (void *)pcb, (int)nsent)); + tcp_output(pcb); + pxtcp_pcb_cancel_poll(pxtcp); + return; + + writeerr: + if (error == ERR_MEM) { + if (nsent > 0) { /* first write succeeded, second failed */ + DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n", + (void *)pxtcp, (void *)pcb, (int)nsent)); + tcp_output(pcb); + } + DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n", + (void *)pxtcp, (void *)pcb)); + pxtcp_pcb_schedule_poll(pxtcp); + } + else { + DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n", + (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error))); + + /* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */ + LWIP_ASSERT1(error == ERR_MEM); + } +} + + +static void +pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp) +{ + struct tcp_pcb *pcb; + err_t error; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->inbound_close); + LWIP_ASSERT1(!pxtcp->inbound_close_done); + LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant); + + pcb = pxtcp->pcb; + LWIP_ASSERT1(pcb != NULL); + + DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n", + (void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state))); + + error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1); + if (error != ERR_OK) { + DPRINTF(("inbound_close: pxtcp %p; pcb %p:" + " tcp_shutdown: error=%s\n", + (void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error))); + pxtcp_pcb_schedule_poll(pxtcp); + return; + } + + pxtcp_pcb_cancel_poll(pxtcp); + pxtcp->inbound_close_done = 1; + + + /* + * If we have already done outbound close previously (passive + * close on the pcb), then we must not hold onto a pcb in LAST_ACK + * state since those will be deleted by lwip when that last ack + * comes from the guest. + * + * NB: We do NOT check for deferred delete here, even though we + * have just set one of its conditions, inbound_close_done. We + * let pcb callbacks that called us do that. It's simpler and + * cleaner that way. + */ + if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) { + pxtcp_pcb_dissociate(pxtcp); + } +} + + +/** + * Check that all forwarded inbound data is sent and acked, and that + * inbound close is scheduled (we aren't called back when it's acked). + */ +DECLINLINE(int) +pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp) +{ + return (pxtcp->inbound_close_done /* also implies that all data forwarded */ + && pxtcp->inbuf.unacked == pxtcp->inbuf.unsent); +} + + +/** + * tcp_sent() callback - guest acknowledged len bytes. + * + * We can advance inbuf::unacked index, making more free space in the + * ringbuf and wake up producer on poll manager thread. + * + * We can also try to send more data if we have any since pcb->snd_buf + * was increased and we are now permitted to send more. + */ +static err_t +pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + size_t unacked; + + LWIP_ASSERT1(pxtcp != NULL); + LWIP_ASSERT1(pxtcp->pcb == pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + LWIP_UNUSED_ARG(pcb); /* only in assert */ + + DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:" + " unacked %d, unsent %d, vacant %d\n", + __func__, (void *)pxtcp, (void *)pcb, (int)len, + (int)pxtcp->inbuf.unacked, + (int)pxtcp->inbuf.unsent, + (int)pxtcp->inbuf.vacant)); + + if (/* __predict_false */ len == 0) { + /* we are notified to start pulling */ + LWIP_ASSERT1(pxtcp->outbound_close_done); + LWIP_ASSERT1(!pxtcp->inbound_close); + LWIP_ASSERT1(pxtcp->inbound_pull); + + unacked = pxtcp->inbuf.unacked; + } + else { + /* + * Advance unacked index. Guest acknowledged the data, so it + * won't be needed again for potential retransmits. + */ + unacked = pxtcp->inbuf.unacked + len; + if (unacked > pxtcp->inbuf.bufsize) { + unacked -= pxtcp->inbuf.bufsize; + } + pxtcp->inbuf.unacked = unacked; + } + + /* arrange for more inbound data */ + if (!pxtcp->inbound_close) { + if (!pxtcp->inbound_pull) { + /* wake up producer, in case it has stopped polling for POLLIN */ + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp); +#ifdef RT_OS_WINDOWS + /** + * We have't got enought room in ring buffer to read atm, + * but we don't want to lose notification from WSAW4ME when + * space would be available, so we reset event with empty recv + */ + recv(pxtcp->sock, NULL, 0, 0); +#endif + } + else { + ssize_t nread; + int stop_pollin; /* ignored */ + + nread = pxtcp_sock_read(pxtcp, &stop_pollin); + + if (nread < 0) { + int sockerr = -(int)nread; + LWIP_UNUSED_ARG(sockerr); + DPRINTF0(("%s: sock %d: errno %d\n", + __func__, pxtcp->sock, sockerr)); + + /* + * Since we are pulling, pxtcp is no longer registered + * with poll manager so we can kill it directly. + */ + pxtcp_pcb_reset_pxtcp(pxtcp); + return ERR_ABRT; + } + } + } + + /* forward more data if we can */ + if (!pxtcp->inbound_close_done) { + pxtcp_pcb_forward_inbound(pxtcp); + + /* + * NB: we might have dissociated from a pcb that transitioned + * to LAST_ACK state, so don't refer to pcb below. + */ + } + + + /* have we got all the acks? */ + if (pxtcp->inbound_close /* no more new data */ + && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */ + && unacked == pxtcp->inbuf.unsent) /* ... and is acked */ + { + char *buf; + + DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + + /* no more retransmits, so buf is not needed */ + buf = pxtcp->inbuf.buf; + pxtcp->inbuf.buf = NULL; + free(buf); + + /* no more acks, so no more callbacks */ + if (pxtcp->pcb != NULL) { + tcp_sent(pxtcp->pcb, NULL); + } + + /* + * We may be the last callback for this pcb if we have also + * successfully forwarded inbound_close. + */ + pxtcp_pcb_maybe_deferred_delete(pxtcp); + } + + return ERR_OK; +} + + +/** + * Callback from poll manager (pxtcp::msg_inpull) to switch + * pxtcp_pcb_sent() to actively pull the last bits of input. See + * POLLHUP comment in pxtcp_pmgr_pump(). + * + * pxtcp::sock is deregistered from poll manager after this callback + * is scheduled. + */ +static void +pxtcp_pcb_pull_inbound(void *ctx) +{ + struct pxtcp *pxtcp = (struct pxtcp *)ctx; + LWIP_ASSERT1(pxtcp != NULL); + + if (pxtcp->pcb == NULL) { + DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp)); + pxtcp_pcb_reset_pxtcp(pxtcp); + return; + } + + DPRINTF(("%s: pxtcp %p: pcb %p\n", + __func__, (void *)pxtcp, (void *)pxtcp->pcb)); + pxtcp->inbound_pull = 1; + pxtcp->deferred_delete = 1; + pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0); +} + + +/** + * tcp_err() callback. + * + * pcb is not passed to this callback since it may be already + * deallocated by the stack, but we can't do anything useful with it + * anyway since connection is gone. + */ +static void +pxtcp_pcb_err(void *arg, err_t error) +{ + struct pxtcp *pxtcp = (struct pxtcp *)arg; + LWIP_ASSERT1(pxtcp != NULL); + + /* + * ERR_CLSD is special - it is reported here when: + * + * . guest has already half-closed + * . we send FIN to guest when external half-closes + * . guest acks that FIN + * + * Since connection is closed but receive has been already closed + * lwip can only report this via tcp_err. At this point the pcb + * is still alive, so we can peek at it if need be. + * + * The interesting twist is when the ACK from guest that akcs our + * FIN also acks some data. In this scenario lwip will NOT call + * tcp_sent() callback with the ACK for that last bit of data but + * instead will call tcp_err with ERR_CLSD right away. Since that + * ACK also acknowledges all the data, we should run some of + * pxtcp_pcb_sent() logic here. + */ + if (error == ERR_CLSD) { + struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */ + + DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:" + " pcb->acked %d;" + " unacked %d, unsent %d, vacant %d\n", + (void *)pxtcp, (void *)pcb, + pcb->acked, + (int)pxtcp->inbuf.unacked, + (int)pxtcp->inbuf.unsent, + (int)pxtcp->inbuf.vacant)); + + LWIP_ASSERT1(pxtcp->pcb == pcb); + LWIP_ASSERT1(pcb->callback_arg == pxtcp); + + if (pcb->acked > 0) { + pxtcp_pcb_sent(pxtcp, pcb, pcb->acked); + } + return; + } + + DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n", + (void *)pxtcp, proxy_lwip_strerr(error))); + + pxtcp->pcb = NULL; /* pcb is gone */ + if (pxtcp->deferred_delete) { + pxtcp_pcb_reset_pxtcp(pxtcp); + } + else { + pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp); + } +} diff --git a/src/VBox/NetworkServices/NAT/pxtcp.h b/src/VBox/NetworkServices/NAT/pxtcp.h new file mode 100644 index 00000000..26a7c230 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxtcp.h @@ -0,0 +1,16 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#ifndef _pxtcp_h_ +#define _pxtcp_h_ + +struct pxtcp; +struct fwspec; + +struct pxtcp *pxtcp_create_forwarded(SOCKET); +void pxtcp_cancel_forwarded(struct pxtcp *); + +void pxtcp_pcb_connect(struct pxtcp *, const struct fwspec *); + +int pxtcp_pmgr_add(struct pxtcp *); +void pxtcp_pmgr_del(struct pxtcp *); + +#endif /* _pxtcp_h_ */ diff --git a/src/VBox/NetworkServices/NAT/pxudp.c b/src/VBox/NetworkServices/NAT/pxudp.c new file mode 100644 index 00000000..9213c7a2 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/pxudp.c @@ -0,0 +1,660 @@ +/* -*- indent-tabs-mode: nil; -*- */ + +#include "winutils.h" +#include "proxy.h" +#include "proxy_pollmgr.h" +#include "pxremap.h" + +#ifndef RT_OS_WINDOWS +#include <sys/types.h> +#include <sys/socket.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <poll.h> + +#include <err.h> /* BSD'ism */ +#else +#include <stdlib.h> +#include <iprt/stdint.h> +#include <stdio.h> +#include "winpoll.h" +#endif + +#include "lwip/opt.h" + +#include "lwip/sys.h" +#include "lwip/tcpip.h" +#include "lwip/udp.h" + +struct pxudp { + /** + * Our poll manager handler. + */ + struct pollmgr_handler pmhdl; + + /** + * lwIP ("internal") side of the proxied connection. + */ + struct udp_pcb *pcb; + + /** + * Host ("external") side of the proxied connection. + */ + SOCKET sock; + + /** + * For some protocols (notably: DNS) we know we are getting just + * one reply, so we don't want the pcb and the socket to sit there + * waiting to be g/c'ed by timeout. This field counts request and + * replies for them. + */ + int count; + + /** + * Mailbox for inbound pbufs. + * + * XXX: since we have single producer and single consumer we can + * use lockless ringbuf like for pxtcp. + */ + sys_mbox_t inmbox; + + /** + * lwIP thread's strong reference to us. + */ + struct pollmgr_refptr *rp; + + /* + * We use static messages to void malloc/free overhead. + */ + struct tcpip_msg msg_delete; /* delete pxudp */ + struct tcpip_msg msg_inbound; /* trigger send of inbound data */ +}; + + +static struct pxudp *pxudp_allocate(void); +static void pxudp_drain_inmbox(struct pxudp *); +static void pxudp_free(struct pxudp *); + +static struct udp_pcb *pxudp_pcb_dissociate(struct pxudp *); + +/* poll manager callbacks for pxudp related channels */ +static int pxudp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int); +static int pxudp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int); + +/* helper functions for sending/receiving pxudp over poll manager channels */ +static ssize_t pxudp_chan_send(enum pollmgr_slot_t, struct pxudp *); +static ssize_t pxudp_chan_send_weak(enum pollmgr_slot_t, struct pxudp *); +static struct pxudp *pxudp_chan_recv(struct pollmgr_handler *, SOCKET, int); +static struct pxudp *pxudp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int); + +/* poll manager callbacks for individual sockets */ +static int pxudp_pmgr_pump(struct pollmgr_handler *, SOCKET, int); + +/* convenience function for poll manager callback */ +static int pxudp_schedule_delete(struct pxudp *); + +/* lwip thread callbacks called via proxy_lwip_post() */ +static void pxudp_pcb_delete_pxudp(void *); + +/* udp pcb callbacks &c */ +static void pxudp_pcb_accept(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); +static void pxudp_pcb_recv(void *, struct udp_pcb *, struct pbuf *, ip_addr_t *, u16_t); +static void pxudp_pcb_forward_outbound(struct pxudp *, struct pbuf *, ip_addr_t *, u16_t); +static void pxudp_pcb_expired(struct pxudp *); +static void pxudp_pcb_write_inbound(void *); +static void pxudp_pcb_forward_inbound(struct pxudp *); + +/* poll manager handlers for pxudp channels */ +static struct pollmgr_handler pxudp_pmgr_chan_add_hdl; +static struct pollmgr_handler pxudp_pmgr_chan_del_hdl; + + +void +pxudp_init(void) +{ + /* + * Create channels. + */ + pxudp_pmgr_chan_add_hdl.callback = pxudp_pmgr_chan_add; + pxudp_pmgr_chan_add_hdl.data = NULL; + pxudp_pmgr_chan_add_hdl.slot = -1; + pollmgr_add_chan(POLLMGR_CHAN_PXUDP_ADD, &pxudp_pmgr_chan_add_hdl); + + pxudp_pmgr_chan_del_hdl.callback = pxudp_pmgr_chan_del; + pxudp_pmgr_chan_del_hdl.data = NULL; + pxudp_pmgr_chan_del_hdl.slot = -1; + pollmgr_add_chan(POLLMGR_CHAN_PXUDP_DEL, &pxudp_pmgr_chan_del_hdl); + + udp_proxy_accept(pxudp_pcb_accept); +} + + +/** + * Syntactic sugar for sending pxudp pointer over poll manager + * channel. Used by lwip thread functions. + */ +static ssize_t +pxudp_chan_send(enum pollmgr_slot_t chan, struct pxudp *pxudp) +{ + return pollmgr_chan_send(chan, &pxudp, sizeof(pxudp)); +} + + +/** + * Syntactic sugar for sending weak reference to pxudp over poll + * manager channel. Used by lwip thread functions. + */ +static ssize_t +pxudp_chan_send_weak(enum pollmgr_slot_t chan, struct pxudp *pxudp) +{ + pollmgr_refptr_weak_ref(pxudp->rp); + return pollmgr_chan_send(chan, &pxudp->rp, sizeof(pxudp->rp)); +} + + +/** + * Counterpart of pxudp_chan_send(). + */ +static struct pxudp * +pxudp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxudp *pxudp; + + pxudp = (struct pxudp *)pollmgr_chan_recv_ptr(handler, fd, revents); + return pxudp; +} + + +/** + * Counterpart of pxudp_chan_send_weak(). + */ +struct pxudp * +pxudp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pollmgr_refptr *rp; + struct pollmgr_handler *base; + struct pxudp *pxudp; + + rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents); + base = (struct pollmgr_handler *)pollmgr_refptr_get(rp); + pxudp = (struct pxudp *)base; + + return pxudp; +} + + +/** + * POLLMGR_CHAN_PXUDP_ADD handler. + * + * Get new pxudp from lwip thread and start polling its socket. + */ +static int +pxudp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxudp *pxudp; + int status; + + pxudp = pxudp_chan_recv(handler, fd, revents); + DPRINTF(("pxudp_add: new pxudp %p; pcb %p\n", + (void *)pxudp, (void *)pxudp->pcb)); + + LWIP_ASSERT1(pxudp != NULL); + LWIP_ASSERT1(pxudp->pmhdl.callback != NULL); + LWIP_ASSERT1(pxudp->pmhdl.data = (void *)pxudp); + LWIP_ASSERT1(pxudp->pmhdl.slot < 0); + + + status = pollmgr_add(&pxudp->pmhdl, pxudp->sock, POLLIN); + if (status < 0) { + pxudp_schedule_delete(pxudp); + } + + return POLLIN; +} + + +/** + * POLLMGR_CHAN_PXUDP_DEL handler. + */ +static int +pxudp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxudp *pxudp; + + pxudp = pxudp_chan_recv_strong(handler, fd, revents); + if (pxudp == NULL) { + return POLLIN; + } + + DPRINTF(("pxudp_del: pxudp %p; socket %d\n", (void *)pxudp, pxudp->sock)); + + pollmgr_del_slot(pxudp->pmhdl.slot); + + /* + * Go back to lwip thread to delete after any pending callbacks + * for unprocessed inbound traffic are drained. + */ + pxudp_schedule_delete(pxudp); + + return POLLIN; +} + + +static struct pxudp * +pxudp_allocate(void) +{ + struct pxudp *pxudp; + err_t error; + + pxudp = (struct pxudp *)malloc(sizeof(*pxudp)); + if (pxudp == NULL) { + return NULL; + } + + pxudp->pmhdl.callback = NULL; + pxudp->pmhdl.data = (void *)pxudp; + pxudp->pmhdl.slot = -1; + + pxudp->pcb = NULL; + pxudp->sock = INVALID_SOCKET; + pxudp->count = 0; + + pxudp->rp = pollmgr_refptr_create(&pxudp->pmhdl); + if (pxudp->rp == NULL) { + free(pxudp); + return NULL; + } + + error = sys_mbox_new(&pxudp->inmbox, 16); + if (error != ERR_OK) { + pollmgr_refptr_unref(pxudp->rp); + free(pxudp); + return NULL; + } + +#define CALLBACK_MSG(MSG, FUNC) \ + do { \ + pxudp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \ + pxudp->MSG.sem = NULL; \ + pxudp->MSG.msg.cb.function = FUNC; \ + pxudp->MSG.msg.cb.ctx = (void *)pxudp; \ + } while (0) + + CALLBACK_MSG(msg_delete, pxudp_pcb_delete_pxudp); + CALLBACK_MSG(msg_inbound, pxudp_pcb_write_inbound); + + return pxudp; +} + + +static void +pxudp_drain_inmbox(struct pxudp *pxudp) +{ + void *ptr; + + if (!sys_mbox_valid(&pxudp->inmbox)) { + return; + } + + while (sys_mbox_tryfetch(&pxudp->inmbox, &ptr) != SYS_MBOX_EMPTY) { + struct pbuf *p = (struct pbuf *)ptr; + pbuf_free(p); + } + + sys_mbox_free(&pxudp->inmbox); + sys_mbox_set_invalid(&pxudp->inmbox); +} + + +static void +pxudp_free(struct pxudp *pxudp) +{ + pxudp_drain_inmbox(pxudp); + free(pxudp); +} + + +/** + * Dissociate pxudp and its udp_pcb. + * + * Unlike its TCP cousin returns the pcb since UDP pcbs need to be + * actively deleted, so save callers the trouble of saving a copy + * before calling us. + */ +static struct udp_pcb * +pxudp_pcb_dissociate(struct pxudp *pxudp) +{ + struct udp_pcb *pcb; + + if (pxudp == NULL || pxudp->pcb == NULL) { + return NULL; + } + + pcb = pxudp->pcb; + + udp_recv(pxudp->pcb, NULL, NULL); + pxudp->pcb = NULL; + + return pcb; +} + + +/** + * Lwip thread callback invoked via pxudp::msg_delete + * + * Since we use static messages to communicate to the lwip thread, we + * cannot delete pxudp without making sure there are no unprocessed + * messages in the lwip thread mailbox. + * + * The easiest way to ensure that is to send this "delete" message as + * the last one and when it's processed we know there are no more and + * it's safe to delete pxudp. + * + * Channel callback should use pxudp_schedule_delete() convenience + * function defined below. + */ +static void +pxudp_pcb_delete_pxudp(void *arg) +{ + struct pxudp *pxudp = (struct pxudp *)arg; + struct udp_pcb *pcb; + + LWIP_ASSERT1(pxudp != NULL); + + if (pxudp->sock != INVALID_SOCKET) { + closesocket(pxudp->sock); + pxudp->sock = INVALID_SOCKET; + } + + pcb = pxudp_pcb_dissociate(pxudp); + if (pcb != NULL) { + udp_remove(pcb); + } + + pollmgr_refptr_unref(pxudp->rp); + pxudp_free(pxudp); +} + + +/** + * Poll manager callback should use this convenience wrapper to + * schedule pxudp deletion on the lwip thread and to deregister from + * the poll manager. + */ +static int +pxudp_schedule_delete(struct pxudp *pxudp) +{ + /* + * If pollmgr_refptr_get() is called by any channel before + * scheduled deletion happens, let them know we are gone. + */ + pxudp->pmhdl.slot = -1; + + /* + * Schedule deletion. Since poll manager thread may be pre-empted + * right after we send the message, the deletion may actually + * happen on the lwip thread before we return from this function, + * so it's not safe to refer to pxudp after this call. + */ + proxy_lwip_post(&pxudp->msg_delete); + + /* tell poll manager to deregister us */ + return -1; +} + + +/** + * New proxied UDP conversation created. + * Global callback for udp_proxy_accept(). + */ +static void +pxudp_pcb_accept(void *arg, struct udp_pcb *newpcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct pxudp *pxudp; + ipX_addr_t dst_addr; + int mapping; + int sdom; + SOCKET sock; + + LWIP_ASSERT1(newpcb != NULL); + LWIP_ASSERT1(p != NULL); + LWIP_UNUSED_ARG(arg); + + pxudp = pxudp_allocate(); + if (pxudp == NULL) { + DPRINTF(("pxudp_allocate: failed\n")); + udp_remove(newpcb); + pbuf_free(p); + return; + } + + sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET; + mapping = pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip); + +#if 0 /* XXX: DNS IPv6->IPv4 remapping hack */ + if (mapping == PXREMAP_MAPPED + && newpcb->local_port == 53 + && PCB_ISIPV6(newpcb)) + { + /* + * "Remap" DNS over IPv6 to IPv4 since Ubuntu dnsmasq does not + * listen on IPv6. + */ + sdom = PF_INET; + ipX_addr_set_loopback(0, &dst_addr); + } +#endif /* DNS IPv6->IPv4 remapping hack */ + + sock = proxy_connected_socket(sdom, SOCK_DGRAM, + &dst_addr, newpcb->local_port); + if (sock == INVALID_SOCKET) { + udp_remove(newpcb); + pbuf_free(p); + return; + } + + pxudp->sock = sock; + pxudp->pcb = newpcb; + udp_recv(newpcb, pxudp_pcb_recv, pxudp); + + pxudp->pmhdl.callback = pxudp_pmgr_pump; + pxudp_chan_send(POLLMGR_CHAN_PXUDP_ADD, pxudp); + + /* dispatch directly instead of calling pxudp_pcb_recv() */ + pxudp_pcb_forward_outbound(pxudp, p, addr, port); +} + + +/** + * udp_recv() callback. + */ +static void +pxudp_pcb_recv(void *arg, struct udp_pcb *pcb, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + struct pxudp *pxudp = (struct pxudp *)arg; + + LWIP_ASSERT1(pxudp != NULL); + LWIP_ASSERT1(pcb == pxudp->pcb); + LWIP_UNUSED_ARG(pcb); + + if (p != NULL) { + pxudp_pcb_forward_outbound(pxudp, p, addr, port); + } + else { + pxudp_pcb_expired(pxudp); + } +} + + +static void +pxudp_pcb_forward_outbound(struct pxudp *pxudp, struct pbuf *p, + ip_addr_t *addr, u16_t port) +{ + LWIP_UNUSED_ARG(addr); + LWIP_UNUSED_ARG(port); + + if (pxudp->pcb->local_port == 53) { + ++pxudp->count; + } + + proxy_sendto(pxudp->sock, p, NULL, 0); + pbuf_free(p); +} + + +/** + * Proxy udp_pcbs are expired by timer, which is signaled by passing + * NULL pbuf to the udp_recv() callback. At that point the pcb is + * removed from the list of proxy udp pcbs so no new datagrams will be + * delivered. + */ +static void +pxudp_pcb_expired(struct pxudp *pxudp) +{ + struct udp_pcb *pcb; + + DPRINTF2(("%s: pxudp %p, pcb %p, sock %d: expired\n", + __func__, (void *)pxudp, (void *)pxudp->pcb, pxudp->sock)); + + pcb = pxudp_pcb_dissociate(pxudp); + if (pcb != NULL) { + udp_remove(pcb); + } + + pxudp_chan_send_weak(POLLMGR_CHAN_PXUDP_DEL, pxudp); +} + + +/** + */ +static int +pxudp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents) +{ + struct pxudp *pxudp; + struct pbuf *p; + ssize_t nread; + err_t error; + + pxudp = (struct pxudp *)handler->data; + LWIP_ASSERT1(handler == &pxudp->pmhdl); + LWIP_ASSERT1(fd == pxudp->sock); + LWIP_UNUSED_ARG(fd); + + + if (revents & ~(POLLIN|POLLERR)) { + DPRINTF(("%s: unexpected revents 0x%x\n", __func__, revents)); + return pxudp_schedule_delete(pxudp); + } + + /* + * XXX: AFAICS, there's no way to match the error with the + * outgoing datagram that triggered it, since we do non-blocking + * sends from lwip thread. + */ + if (revents & POLLERR) { + int sockerr = -1; + socklen_t optlen = (socklen_t)sizeof(sockerr); + int status; + + status = getsockopt(pxudp->sock, SOL_SOCKET, + SO_ERROR, (char *)&sockerr, &optlen); + if (status < 0) { + DPRINTF(("%s: sock %d: SO_ERROR failed with errno %d\n", + __func__, pxudp->sock, errno)); + } + else { + DPRINTF(("%s: sock %d: errno %d\n", + __func__, pxudp->sock, sockerr)); + } + } + + if ((revents & POLLIN) == 0) { + return POLLIN; + } + + nread = recv(pxudp->sock, pollmgr_udpbuf, sizeof(pollmgr_udpbuf), 0); + if (nread == SOCKET_ERROR) { + perror(__func__); + return POLLIN; + } + + p = pbuf_alloc(PBUF_RAW, (u16_t)nread, PBUF_RAM); + if (p == NULL) { + DPRINTF(("%s: pbuf_alloc(%d) failed\n", __func__, (int)nread)); + return POLLIN; + } + + error = pbuf_take(p, pollmgr_udpbuf, (u16_t)nread); + if (error != ERR_OK) { + DPRINTF(("%s: pbuf_take(%d) failed\n", __func__, (int)nread)); + pbuf_free(p); + return POLLIN; + } + + error = sys_mbox_trypost(&pxudp->inmbox, p); + if (error != ERR_OK) { + pbuf_free(p); + return POLLIN; + } + + proxy_lwip_post(&pxudp->msg_inbound); + + return POLLIN; +} + + +/** + * Callback from poll manager to trigger sending to guest. + */ +static void +pxudp_pcb_write_inbound(void *ctx) +{ + struct pxudp *pxudp = (struct pxudp *)ctx; + LWIP_ASSERT1(pxudp != NULL); + + if (pxudp->pcb == NULL) { + return; + } + + pxudp_pcb_forward_inbound(pxudp); +} + + +static void +pxudp_pcb_forward_inbound(struct pxudp *pxudp) +{ + struct pbuf *p; + u32_t timo; + err_t error; + + if (!sys_mbox_valid(&pxudp->inmbox)) { + return; + } + + timo = sys_mbox_tryfetch(&pxudp->inmbox, (void **)&p); + if (timo == SYS_MBOX_EMPTY) { + return; + } + + error = udp_send(pxudp->pcb, p); + if (error != ERR_OK) { + DPRINTF(("%s: udp_send(pcb %p) err %d\n", + __func__, (void *)pxudp, error)); + } + + pbuf_free(p); + + /* + * If we enabled counting in pxudp_pcb_forward_outbound() check + * that we have (all) the reply(s). + */ + if (pxudp->count > 0) { + --pxudp->count; + if (pxudp->count == 0) { + pxudp_pcb_expired(pxudp); + } + } +} diff --git a/src/VBox/NetworkServices/NAT/rtmon_bsd.c b/src/VBox/NetworkServices/NAT/rtmon_bsd.c new file mode 100644 index 00000000..52da3016 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/rtmon_bsd.c @@ -0,0 +1,97 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "proxy.h" + +#include <sys/types.h> +#include <sys/socket.h> + +#include <net/if_dl.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/ip6.h> + +#include <errno.h> +#include <string.h> +#include <unistd.h> + + +/** + * Query IPv6 routing table - BSD routing sockets version. + * + * We don't actually monitor the routing socket for updates, and + * instead query the kernel each time. + * + * We take a shortcut and don't read the reply to our RTM_GET - if + * there's no default IPv6 route, write(2) will fail with ESRCH + * synchronously. In theory it may fail asynchronously and we should + * wait for the RTM_GET reply and check rt_msghdr::rtm_errno. + * + * KAME code in *BSD maintains internally a list of default routers + * that it learned from RAs, and installs only one of them into the + * routing table (actually, I'm not sure if BSD routing table can + * handle multiple routes to the same destination). One side-effect + * of this is that when manually configured route (e.g. teredo) is + * deleted, the system will lose its default route even when KAME IPv6 + * has default router(s) in its internal list. Next RA will force the + * update, though. + * + * Solaris does expose multiple routes in the routing table and + * replies to RTM_GET with "default default". + */ +int +rtmon_get_defaults(void) +{ + int rtsock; + struct req { + struct rt_msghdr rtm; + struct sockaddr_in6 dst; + struct sockaddr_in6 mask; + struct sockaddr_dl ifp; + } req; + ssize_t nsent; + + rtsock = socket(PF_ROUTE, SOCK_RAW, AF_INET6); + if (rtsock < 0) { + DPRINTF0(("rtmon: failed to create routing socket\n")); + return -1; + } + + memset(&req, 0, sizeof(req)); + + req.rtm.rtm_type = RTM_GET; + req.rtm.rtm_version = RTM_VERSION; + req.rtm.rtm_msglen = sizeof(req); + req.rtm.rtm_seq = 0x12345; + + req.rtm.rtm_flags = RTF_UP; + req.rtm.rtm_addrs = RTA_DST | RTA_NETMASK | RTA_IFP; + + req.dst.sin6_family = AF_INET6; +#if HAVE_SA_LEN + req.dst.sin6_len = sizeof(req.dst); +#endif + + req.mask.sin6_family = AF_INET6; +#if HAVE_SA_LEN + req.mask.sin6_len = sizeof(req.mask); +#endif + + req.ifp.sdl_family = AF_LINK; +#if HAVE_SA_LEN + req.ifp.sdl_len = sizeof(req.ifp); +#endif + + nsent = write(rtsock, &req, req.rtm.rtm_msglen); + if (nsent < 0) { + if (errno == ESRCH) { + /* there's no default route */ + return 0; + } + else { + DPRINTF0(("rtmon: failed to send RTM_GET\n")); + return -1; + } + } + + return 1; +} diff --git a/src/VBox/NetworkServices/NAT/rtmon_linux.c b/src/VBox/NetworkServices/NAT/rtmon_linux.c new file mode 100644 index 00000000..ecbfe390 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/rtmon_linux.c @@ -0,0 +1,230 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#include "proxy.h" + +#include <sys/types.h> /* must come before linux/netlink */ +#include <sys/socket.h> + +#include <asm/types.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +#include <errno.h> +#include <string.h> +#include <unistd.h> + + +static int rtmon_check_defaults(const void *buf, size_t len); + + +/** + * Read IPv6 routing table - Linux rtnetlink version. + * + * XXX: TODO: To avoid re-reading the table we should subscribe to + * updates by binding a monitoring NETLINK_ROUTE socket to + * sockaddr_nl::nl_groups = RTMGRP_IPV6_ROUTE. + * + * But that will provide updates only. Documentation is scarce, but + * from what I've seen it seems that to get accurate routing info the + * monitoring socket needs to be created first, then full routing + * table requested (easier to do via spearate socket), then monitoring + * socket polled for input. The first update(s) of the monitoring + * socket may happen before full table is returned, so we can't just + * count the defaults, we need to keep track of their { oif, gw } to + * correctly ignore updates that are reported via monitoring socket, + * but that are already reflected in the full routing table returned + * in response to our request. + */ +int +rtmon_get_defaults(void) +{ + int rtsock; + ssize_t nsent, ssize; + int ndefrts; + + char *buf = NULL; + size_t bufsize; + + struct { + struct nlmsghdr nh; + struct rtmsg rtm; + char attrbuf[512]; + } rtreq; + + memset(&rtreq, 0, sizeof(rtreq)); + rtreq.nh.nlmsg_type = RTM_GETROUTE; + rtreq.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + rtreq.rtm.rtm_family = AF_INET6; + rtreq.rtm.rtm_table = RT_TABLE_MAIN; + rtreq.rtm.rtm_protocol = RTPROT_UNSPEC; + + rtreq.nh.nlmsg_len = NLMSG_SPACE(sizeof(rtreq.rtm)); + + bufsize = 1024; + ssize = bufsize; + for (;;) { + char *newbuf; + int recverr; + + newbuf = (char *)realloc(buf, ssize); + if (newbuf == NULL) { + DPRINTF0(("rtmon: failed to %sallocate buffer\n", + buf == NULL ? "" : "re")); + free(buf); + return -1; + } + + buf = newbuf; + bufsize = ssize; + + /* it's easier to reopen than to flush */ + rtsock = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (rtsock < 0) { + DPRINTF0(("rtmon: failed to create netlink socket: %s", strerror(errno))); + free(buf); + return -1; + } + + nsent = send(rtsock, &rtreq, rtreq.nh.nlmsg_len, 0); + if (nsent < 0) { + DPRINTF0(("rtmon: RTM_GETROUTE failed: %s", strerror(errno))); + close (rtsock); + free(buf); + return -1; + } + + ssize = recv(rtsock, buf, bufsize, MSG_TRUNC); + recverr = errno; + close (rtsock); + + if (ssize < 0) { + DPRINTF(("rtmon: failed to read RTM_GETROUTE response: %s", + strerror(recverr))); + free(buf); + return -1; + } + + if ((size_t)ssize <= bufsize) { + DPRINTF2(("rtmon: RTM_GETROUTE: %lu bytes\n", + (unsigned long)ssize)); + break; + } + + DPRINTF2(("rtmon: RTM_GETROUTE: truncated %lu to %lu bytes, retrying\n", + (unsigned long)ssize, (unsigned long)bufsize)); + /* try again with larger buffer */ + } + + ndefrts = rtmon_check_defaults(buf, (size_t)ssize); + free(buf); + + if (ndefrts == 0) { + DPRINTF(("rtmon: no IPv6 default routes found\n")); + } + else { + DPRINTF(("rtmon: %d IPv6 default route%s found\n", + ndefrts, + ndefrts == 1 || ndefrts == -1 ? "" : "s")); + } + + return ndefrts; +} + + +/** + * Scan netlink message in the buffer for IPv6 default route changes. + */ +static int +rtmon_check_defaults(const void *buf, size_t len) +{ + struct nlmsghdr *nh; + int dfltdiff = 0; + + for (nh = (struct nlmsghdr *)buf; + NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) + { + struct rtmsg *rtm; + struct rtattr *rta; + int attrlen; + int delta = 0; + const void *gwbuf; + size_t gwlen; + int oif; + + DPRINTF2(("nlmsg type %d flags 0x%x\n", + nh->nlmsg_seq, nh->nlmsg_type, nh->nlmsg_flags)); + + if (nh->nlmsg_type == NLMSG_DONE) { + break; + } + + if (nh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *ne = (struct nlmsgerr *)NLMSG_DATA(nh); + DPRINTF2(("> error %d\n", ne->error)); + LWIP_UNUSED_ARG(ne); + break; + } + + if (nh->nlmsg_type < RTM_BASE || RTM_MAX <= nh->nlmsg_type) { + /* shouldn't happen */ + DPRINTF2(("> not an RTM message!\n")); + continue; + } + + + rtm = (struct rtmsg *)NLMSG_DATA(nh); + attrlen = RTM_PAYLOAD(nh); + + if (nh->nlmsg_type == RTM_NEWROUTE) { + delta = +1; + } + else if (nh->nlmsg_type == RTM_DELROUTE) { + delta = -1; + } + else { + /* shouldn't happen */ + continue; + } + + /* + * Is this an IPv6 default route in the main table? (Local + * table always has ::/0 reject route, hence the last check). + */ + if (rtm->rtm_family == AF_INET6 /* should always be true */ + && rtm->rtm_dst_len == 0 + && rtm->rtm_table == RT_TABLE_MAIN) + { + dfltdiff += delta; + } + else { + /* some other route change */ + continue; + } + + + gwbuf = NULL; + gwlen = 0; + oif = -1; + + for (rta = RTM_RTA(rtm); + RTA_OK(rta, attrlen); + rta = RTA_NEXT(rta, attrlen)) + { + if (rta->rta_type == RTA_GATEWAY) { + gwbuf = RTA_DATA(rta); + gwlen = RTA_PAYLOAD(rta); + } + else if (rta->rta_type == RTA_OIF) { + /* assert RTA_PAYLOAD(rta) == 4 */ + memcpy(&oif, RTA_DATA(rta), sizeof(oif)); + } + } + + /* XXX: TODO: note that { oif, gw } was added/removed */ + LWIP_UNUSED_ARG(gwbuf); + LWIP_UNUSED_ARG(gwlen); + LWIP_UNUSED_ARG(oif); + } + + return dfltdiff; +} diff --git a/src/VBox/NetworkServices/NAT/rtmon_win.c b/src/VBox/NetworkServices/NAT/rtmon_win.c new file mode 100644 index 00000000..496032f0 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/rtmon_win.c @@ -0,0 +1,4 @@ +int +rtmon_get_defaults(void) { + return 0; +} diff --git a/src/VBox/NetworkServices/NAT/tftp.h b/src/VBox/NetworkServices/NAT/tftp.h new file mode 100644 index 00000000..a9d9a7e8 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/tftp.h @@ -0,0 +1,30 @@ +/* -*- indent-tabs-mode: nil; -*- */ +#ifndef _TFTP_H_ +#define _TFTP_H_ + +#define TFTP_SERVER_PORT 69 + +/* opcodes */ +#define TFTP_RRQ 1 +#define TFTP_WRQ 2 +#define TFTP_DATA 3 +#define TFTP_ACK 4 +#define TFTP_ERROR 5 +/* RFC 2347 */ +#define TFTP_OACK 6 + + +/* error codes */ +#define TFTP_EUNDEF 0 /* Not defined, see error message (if any). */ +#define TFTP_ENOENT 1 /* File not found. */ +#define TFTP_EACCESS 2 /* Access violation. */ +#define TFTP_EFBIG 3 /* Disk full or allocation exceeded. */ +#define TFTP_ENOSYS 4 /* Illegal TFTP operation. */ +#define TFTP_ESRCH 5 /* Unknown transfer ID. */ +#define TFTP_EEXIST 6 /* File already exists. */ +#define TFTP_EUSER 7 /* No such user. */ +/* RFC 2347 */ +#define TFTP_EONAK 8 /* Option refused. */ + + +#endif /* _TFTP_H_ */ diff --git a/src/VBox/NetworkServices/NAT/winpoll.h b/src/VBox/NetworkServices/NAT/winpoll.h new file mode 100644 index 00000000..2c509ede --- /dev/null +++ b/src/VBox/NetworkServices/NAT/winpoll.h @@ -0,0 +1,33 @@ +#ifndef _WINPOLL_H_ +#define _WINPOLL_H_ +# include <iprt/cdefs.h> +/** + * WinSock2 has definition for POLL* and pollfd, but it defined for _WIN32_WINNT > 0x0600 + * and used in WSAPoll, which has very unclear history. + */ +# if(_WIN32_WINNT < 0x0600) +# define POLLRDNORM 0x0100 +# define POLLRDBAND 0x0200 +# define POLLIN (POLLRDNORM | POLLRDBAND) +# define POLLPRI 0x0400 + +# define POLLWRNORM 0x0010 +# define POLLOUT (POLLWRNORM) +# define POLLWRBAND 0x0020 + +# define POLLERR 0x0001 +# define POLLHUP 0x0002 +# define POLLNVAL 0x0004 + +struct pollfd { + + SOCKET fd; + SHORT events; + SHORT revents; + +}; +#endif +RT_C_DECLS_BEGIN +int RTWinPoll(struct pollfd *pFds, unsigned int nfds, int timeout, int *pNready); +RT_C_DECLS_END +#endif diff --git a/src/VBox/NetworkServices/NAT/winutils.h b/src/VBox/NetworkServices/NAT/winutils.h new file mode 100644 index 00000000..281c0c66 --- /dev/null +++ b/src/VBox/NetworkServices/NAT/winutils.h @@ -0,0 +1,165 @@ +#ifndef __WINUTILS_H_ +# define __WINUTILS_H_ + +# ifdef RT_OS_WINDOWS +# include <iprt/cdefs.h> +# include <WinSock2.h> +# include <ws2tcpip.h> +# include <mswsock.h> +# include <Windows.h> +# include <iprt/err.h> +# include <iprt/net.h> +# include <iprt/log.h> +/** + * Inclusion of lwip/def.h was added here to avoid conflict of definitions + * of hton-family functions in LWIP and windock's headers. + */ +# include <lwip/def.h> + +# ifndef PF_LOCAL +# define PF_LOCAL AF_INET +# endif + +# define warn(...) DPRINTF2((__VA_ARGS__)) +# define warnx warn +# ifdef DEBUG +# define err(code,...) do { \ + AssertMsgFailed((__VA_ARGS__)); \ + }while(0) +#else +# define err(code,...) do { \ + DPRINTF0((__VA_ARGS__)); \ + ExitProcess(code); \ + }while(0) +#endif +# define errx err +# define __func__ __FUNCTION__ +# define __attribute__(x) /* IGNORE */ + +/* + * XXX: inet_ntop() is only available starting from Vista. + */ +DECLINLINE(PCSTR) +inet_ntop(INT Family, PVOID pAddr, PSTR pStringBuf, size_t StringBufSize) +{ + DWORD size = (DWORD)StringBufSize; + int status; + + if (Family == AF_INET) + { + struct sockaddr_in sin; + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + memcpy(&sin.sin_addr, pAddr, sizeof(sin.sin_addr)); + sin.sin_port = 0; + status = WSAAddressToStringA((LPSOCKADDR)&sin, sizeof(sin), NULL, + pStringBuf, &size); + } + else if (Family == AF_INET6) + { + struct sockaddr_in6 sin6; + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + memcpy(&sin6.sin6_addr, pAddr, sizeof(sin6.sin6_addr)); + sin6.sin6_port = 0; + status = WSAAddressToStringA((LPSOCKADDR)&sin6, sizeof(sin6), NULL, + pStringBuf, &size); + } + else + { + WSASetLastError(WSAEAFNOSUPPORT); + return NULL; + } + + if (status == SOCKET_ERROR) + { + return NULL; + } + + return pStringBuf; +} + + +/** + * tftpd emulation we're using POSIX operations which needs "DOS errno". see proxy_tftpd.c + */ +# ifndef _USE_WINSTD_ERRNO +/** + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms737828(v=vs.85).aspx + * "Error Codes - errno, h_errno and WSAGetLastError" says "Error codes set by Windows Sockets are + * not made available through the errno variable." + */ +# include <errno.h> +# ifdef errno +# undef errno +# endif +# define errno (WSAGetLastError()) +# endif +/* Missing errno codes */ + +/** + * "Windows Sockets Error Codes" obtained with WSAGetLastError(). + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740668(v=vs.85).aspx + */ +# undef EMSGSIZE +# define EMSGSIZE WSAEMSGSIZE +# undef ENETDOWN +# define ENETDOWN WSAENETDOWN +# undef ENETUNREACH +# define ENETUNREACH WSAENETUNREACH +# undef EHOSTDOWN +# define EHOSTDOWN WSAEHOSTDOWN +# undef EHOSTUNREACH +# define EHOSTUNREACH WSAEHOSTUNREACH + +/** + * parameters to shutdown (2) with Winsock2 + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740481(v=vs.85).aspx + */ +# define SHUT_RD SD_RECEIVE +# define SHUT_WR SD_SEND +# define SHUT_RDWR SD_BOTH + +typedef ULONG nfds_t; + +typedef WSABUF IOVEC; + +# define IOVEC_GET_BASE(iov) ((iov).buf) +# define IOVEC_SET_BASE(iov, b) ((iov).buf = (b)) + +# define IOVEC_GET_LEN(iov) ((iov).len) +# define IOVEC_SET_LEN(iov, l) ((iov).len = (ULONG)(l)) + +#if _WIN32_WINNT < 0x0600 +/* otherwise defined the other way around in ws2def.h */ +#define cmsghdr _WSACMSGHDR + +#undef CMSG_DATA /* wincrypt.h can byte my shiny metal #undef */ +#define CMSG_DATA WSA_CMSG_DATA +#define CMSG_LEN WSA_CMSG_LEN +#define CMSG_SPACE WSA_CMSG_SPACE + +#define CMSG_FIRSTHDR WSA_CMSG_FIRSTHDR +#define CMSG_NXTHDR WSA_CMSG_NXTHDR +#endif /* _WIN32_WINNT < 0x0600 - provide unglified CMSG names */ + +RT_C_DECLS_BEGIN +int RTWinSocketPair(int domain, int type, int protocol, SOCKET socket_vector[2]); +RT_C_DECLS_END + +# else /* !RT_OS_WINDOWS */ +# define ioctlsocket ioctl +# define closesocket close +# define SOCKET int +# define INVALID_SOCKET (-1) +# define SOCKET_ERROR (-1) + +typedef struct iovec IOVEC; + +# define IOVEC_GET_BASE(iov) ((iov).iov_base) +# define IOVEC_SET_BASE(iov, b) ((iov).iov_base = (b)) + +# define IOVEC_GET_LEN(iov) ((iov).iov_len) +# define IOVEC_SET_LEN(iov, l) ((iov).iov_len = (l)) +# endif +#endif diff --git a/src/VBox/NetworkServices/NetLib/ComHostUtils.cpp b/src/VBox/NetworkServices/NetLib/ComHostUtils.cpp new file mode 100644 index 00000000..33452364 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/ComHostUtils.cpp @@ -0,0 +1,215 @@ +/* $Id: ComHostUtils.cpp $ */ +/** @file + * ComHostUtils.cpp + */ + +/* + * Copyright (C) 2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/******************************************************************************* +* Header Files * +*******************************************************************************/ +#ifdef RT_OS_WINDOWS +# define VBOX_COM_OUTOFPROC_MODULE +#endif +#include <VBox/com/com.h> +#include <VBox/com/listeners.h> +#include <VBox/com/string.h> +#include <VBox/com/Guid.h> +#include <VBox/com/array.h> +#include <VBox/com/ErrorInfo.h> +#include <VBox/com/errorprint.h> +#include <VBox/com/EventQueue.h> +#include <VBox/com/VirtualBox.h> + +#include <iprt/alloca.h> +#include <iprt/buildconfig.h> +#include <iprt/err.h> +#include <iprt/net.h> /* must come before getopt */ +#include <iprt/getopt.h> +#include <iprt/initterm.h> +#include <iprt/message.h> +#include <iprt/param.h> +#include <iprt/path.h> +#include <iprt/stream.h> +#include <iprt/time.h> +#include <iprt/string.h> + + +#include "../NetLib/VBoxNetLib.h" +#include "../NetLib/shared_ptr.h" + +#include <vector> +#include <list> +#include <string> +#include <map> + +#include "../NetLib/VBoxNetBaseService.h" + +#ifdef RT_OS_WINDOWS /* WinMain */ +# include <Windows.h> +# include <stdlib.h> +# ifdef INET_ADDRSTRLEN +/* On Windows INET_ADDRSTRLEN defined as 22 Ws2ipdef.h, because it include port number */ +# undef INET_ADDRSTRLEN +# endif +# define INET_ADDRSTRLEN 16 +#else +# include <netinet/in.h> +#endif + +#include "utils.h" + + +VBOX_LISTENER_DECLARE(NATNetworkListenerImpl) + + +int localMappings(const ComNatPtr& nat, AddressToOffsetMapping& mapping) +{ + mapping.clear(); + + ComBstrArray strs; + int cStrs; + HRESULT hrc = nat->COMGETTER(LocalMappings)(ComSafeArrayAsOutParam(strs)); + if ( SUCCEEDED(hrc) + && (cStrs = strs.size())) + { + for (int i = 0; i < cStrs; ++i) + { + char szAddr[17]; + RTNETADDRIPV4 ip4addr; + char *pszTerm; + uint32_t u32Off; + com::Utf8Str strLo2Off(strs[i]); + const char *pszLo2Off = strLo2Off.c_str(); + + RT_ZERO(szAddr); + + pszTerm = RTStrStr(pszLo2Off, "="); + + if ( pszTerm + && (pszTerm - pszLo2Off) <= INET_ADDRSTRLEN) + { + memcpy(szAddr, pszLo2Off, (pszTerm - pszLo2Off)); + int rc = RTNetStrToIPv4Addr(szAddr, &ip4addr); + if (RT_SUCCESS(rc)) + { + u32Off = RTStrToUInt32(pszTerm + 1); + if (u32Off != 0) + mapping.insert( + AddressToOffsetMapping::value_type(ip4addr, u32Off)); + } + } + } + } + else + return VERR_NOT_FOUND; + + return VINF_SUCCESS; +} + +/** + * @note: const dropped here, because of map<K,V>::operator[] which isn't const, map<K,V>::at() has const + * variant but it's C++11. + */ +int hostDnsServers(const ComHostPtr& host, const RTNETADDRIPV4& networkid, + /*const*/ AddressToOffsetMapping& mapping, AddressList& servers) +{ + servers.clear(); + + ComBstrArray strs; + if (SUCCEEDED(host->COMGETTER(NameServers)(ComSafeArrayAsOutParam(strs)))) + { + RTNETADDRIPV4 addr; + int rc; + + for (unsigned int i = 0; i < strs.size(); ++i) + { + rc = RTNetStrToIPv4Addr(com::Utf8Str(strs[i]).c_str(), &addr); + if (RT_SUCCESS(rc)) + { + if (addr.au8[0] == 127) + { + /* XXX: here we want map<K,V>::at(const K& k) const */ + if (mapping[addr] != 0) + { + addr.u = RT_H2N_U32(RT_N2H_U32(networkid.u) + + mapping[addr]); + } + else + continue; /* XXX: Warning here (local mapping wasn't registered) */ + } + + servers.push_back(addr); + } + } + } + else + return VERR_NOT_FOUND; + + return VINF_SUCCESS; +} + + +int hostDnsSearchList(const ComHostPtr& host, std::vector<std::string>& strings) +{ + strings.clear(); + + ComBstrArray strs; + if (SUCCEEDED(host->COMGETTER(SearchStrings)(ComSafeArrayAsOutParam(strs)))) + { + for (unsigned int i = 0; i < strs.size(); ++i) + { + strings.push_back(com::Utf8Str(strs[i]).c_str()); + } + } + else + return VERR_NOT_FOUND; + + return VINF_SUCCESS; +} + + +int hostDnsDomain(const ComHostPtr& host, std::string& domainStr) +{ + com::Bstr domain; + if (SUCCEEDED(host->COMGETTER(DomainName)(domain.asOutParam()))) + { + domainStr = com::Utf8Str(domain).c_str(); + return VINF_SUCCESS; + } + + return VERR_NOT_FOUND; +} + + +int createNatListener(ComNatListenerPtr& listener, const ComVirtualBoxPtr& vboxptr, + NATNetworkEventAdapter *adapter, /* const */ ComEventTypeArray& events) +{ + ComObjPtr<NATNetworkListenerImpl> obj; + HRESULT hrc = obj.createObject(); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + hrc = obj->init(new NATNetworkListener(), adapter); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + ComPtr<IEventSource> esVBox; + hrc = vboxptr->COMGETTER(EventSource)(esVBox.asOutParam()); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + listener = obj; + + hrc = esVBox->RegisterListener(listener, ComSafeArrayAsInParam(events), true); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + return VINF_SUCCESS; +} diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp index ec5ed275..9cced5d2 100644 --- a/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp +++ b/src/VBox/NetworkServices/NetLib/VBoxNetARP.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2009 Oracle Corporation + * Copyright (C) 2009-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -43,7 +43,7 @@ bool VBoxNetArpHandleIt(PSUPDRVSESSION pSession, INTNETIFHANDLE hIf, PINTNETBUF */ PCINTNETHDR pHdr = IntNetRingGetNextFrameToRead(&pBuf->Recv); if ( !pHdr - || pHdr->u16Type != INTNETHDR_TYPE_FRAME) + || pHdr->u8Type != INTNETHDR_TYPE_FRAME) return false; size_t cbFrame = pHdr->cbFrame; diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp index 435a1cb8..8e0cfd27 100644 --- a/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp +++ b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.cpp @@ -21,6 +21,16 @@ *******************************************************************************/ #define LOG_GROUP LOG_GROUP_NET_SERVICE +#include <VBox/com/com.h> +#include <VBox/com/listeners.h> +#include <VBox/com/string.h> +#include <VBox/com/Guid.h> +#include <VBox/com/array.h> +#include <VBox/com/ErrorInfo.h> +#include <VBox/com/errorprint.h> +#include <VBox/com/VirtualBox.h> +#include <VBox/com/NativeEventQueue.h> + #include <iprt/alloca.h> #include <iprt/buildconfig.h> #include <iprt/err.h> @@ -29,19 +39,24 @@ #include <iprt/initterm.h> #include <iprt/param.h> #include <iprt/path.h> +#include <iprt/process.h> #include <iprt/stream.h> #include <iprt/string.h> #include <iprt/time.h> +#include <iprt/thread.h> #include <iprt/mem.h> +#include <iprt/message.h> #include <VBox/sup.h> #include <VBox/intnet.h> +#include <VBox/intnetinline.h> #include <VBox/vmm/vmm.h> #include <VBox/version.h> #include <vector> #include <string> +#include <VBox/err.h> #include <VBox/log.h> #include "VBoxNetLib.h" @@ -56,6 +71,65 @@ /******************************************************************************* * Structures and Typedefs * *******************************************************************************/ +struct VBoxNetBaseService::Data +{ + Data(const std::string& aName, const std::string& aNetworkName): + m_Name(aName), + m_Network(aNetworkName), + m_enmTrunkType(kIntNetTrunkType_WhateverNone), + m_pSession(NIL_RTR0PTR), + m_cbSendBuf(128 * _1K), + m_cbRecvBuf(256 * _1K), + m_hIf(INTNET_HANDLE_INVALID), + m_pIfBuf(NULL), + m_cVerbosity(0), + m_fNeedMain(false), + m_EventQ(NULL), + m_hThrRecv(NIL_RTTHREAD), + fShutdown(false) + { + int rc = RTCritSectInit(&m_csThis); + AssertRC(rc); + }; + + std::string m_Name; + std::string m_Network; + std::string m_TrunkName; + INTNETTRUNKTYPE m_enmTrunkType; + + RTMAC m_MacAddress; + RTNETADDRIPV4 m_Ipv4Address; + RTNETADDRIPV4 m_Ipv4Netmask; + + PSUPDRVSESSION m_pSession; + uint32_t m_cbSendBuf; + uint32_t m_cbRecvBuf; + INTNETIFHANDLE m_hIf; /**< The handle to the network interface. */ + PINTNETBUF m_pIfBuf; /**< Interface buffer. */ + + std::vector<PRTGETOPTDEF> m_vecOptionDefs; + + int32_t m_cVerbosity; + + /* cs for syncing */ + RTCRITSECT m_csThis; + + /* Controls whether service will connect SVC for runtime needs */ + bool m_fNeedMain; + /* Event Queue */ + com::NativeEventQueue *m_EventQ; + + /** receiving thread, used only if main is used */ + RTTHREAD m_hThrRecv; + + bool fShutdown; + static int recvLoop(RTTHREAD, void *); +}; + +/******************************************************************************* +* Global Variables * +*******************************************************************************/ +/* Commonly used options for network configuration */ static RTGETOPTDEF g_aGetOptDef[] = { { "--name", 'N', RTGETOPT_REQ_STRING }, @@ -64,50 +138,104 @@ static RTGETOPTDEF g_aGetOptDef[] = { "--trunk-type", 'T', RTGETOPT_REQ_STRING }, { "--mac-address", 'a', RTGETOPT_REQ_MACADDR }, { "--ip-address", 'i', RTGETOPT_REQ_IPV4ADDR }, + { "--netmask", 'm', RTGETOPT_REQ_IPV4ADDR }, { "--verbose", 'v', RTGETOPT_REQ_NOTHING }, + { "--need-main", 'M', RTGETOPT_REQ_BOOL }, }; -VBoxNetBaseService::VBoxNetBaseService() + + +int VBoxNetBaseService::Data::recvLoop(RTTHREAD, void *pvUser) { + VBoxNetBaseService *pThis = static_cast<VBoxNetBaseService *>(pvUser); + + HRESULT hrc = com::Initialize(); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + pThis->doReceiveLoop(); + + return VINF_SUCCESS; } + + +VBoxNetBaseService::VBoxNetBaseService(const std::string& aName, const std::string& aNetworkName):m(NULL) +{ + m = new VBoxNetBaseService::Data(aName, aNetworkName); + + for(unsigned int i = 0; i < RT_ELEMENTS(g_aGetOptDef); ++i) + m->m_vecOptionDefs.push_back(&g_aGetOptDef[i]); +} + + VBoxNetBaseService::~VBoxNetBaseService() { /* * Close the interface connection. */ - if (m_hIf != INTNET_HANDLE_INVALID) + if (m != NULL) { - INTNETIFCLOSEREQ CloseReq; - CloseReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; - CloseReq.Hdr.cbReq = sizeof(CloseReq); - CloseReq.pSession = m_pSession; - CloseReq.hIf = m_hIf; - m_hIf = INTNET_HANDLE_INVALID; - int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_RTCPUID, VMMR0_DO_INTNET_IF_CLOSE, 0, &CloseReq.Hdr); - AssertRC(rc); - } + shutdown(); + if (m->m_hIf != INTNET_HANDLE_INVALID) + { + INTNETIFCLOSEREQ CloseReq; + CloseReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + CloseReq.Hdr.cbReq = sizeof(CloseReq); + CloseReq.pSession = m->m_pSession; + CloseReq.hIf = m->m_hIf; + m->m_hIf = INTNET_HANDLE_INVALID; + int rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_RTCPUID, VMMR0_DO_INTNET_IF_CLOSE, 0, &CloseReq.Hdr); + AssertRC(rc); + } - if (m_pSession) - { - SUPR3Term(false /*fForced*/); - m_pSession = NIL_RTR0PTR; + if (m->m_pSession != NIL_RTR0PTR) + { + SUPR3Term(false /*fForced*/); + m->m_pSession = NIL_RTR0PTR; + } + + RTCritSectDelete(&m->m_csThis); + + delete m; + m = NULL; } } + int VBoxNetBaseService::init() { - /* numbers from DrvIntNet */ - m_cbSendBuf = 36 * _1K; - m_cbRecvBuf = 218 * _1K; - m_hIf = INTNET_HANDLE_INVALID; - m_pIfBuf = NULL; + if (isMainNeeded()) + { + HRESULT hrc = com::Initialize(); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + + hrc = virtualbox.createLocalObject(CLSID_VirtualBox); + AssertComRCReturn(hrc, VERR_INTERNAL_ERROR); + } - m_cVerbosity = 0; - m_Name = "VBoxNetNAT"; - m_Network = "intnet"; - for(unsigned int i = 0; i < RT_ELEMENTS(g_aGetOptDef); ++i) - m_vecOptionDefs.push_back(&g_aGetOptDef[i]); return VINF_SUCCESS; } + + +bool VBoxNetBaseService::isMainNeeded() const +{ + return m->m_fNeedMain; +} + + +int VBoxNetBaseService::run() +{ + /** + * If child class need Main we start receving thread which calls doReceiveLoop and enter to event polling loop + * and for the rest clients we do receiving on the current (main) thread. + */ + if (isMainNeeded()) + return startReceiveThreadAndEnterEventLoop(); + else + { + doReceiveLoop(); + return VINF_SUCCESS; + } +} + /** * Parse the arguments. * @@ -121,8 +249,12 @@ int VBoxNetBaseService::parseArgs(int argc, char **argv) RTGETOPTSTATE State; PRTGETOPTDEF paOptionArray = getOptionsPtr(); - int rc = RTGetOptInit(&State, argc, argv, paOptionArray, m_vecOptionDefs.size(), 0, 0 /*fFlags*/); + int rc = RTGetOptInit(&State, argc, argv, paOptionArray, m->m_vecOptionDefs.size(), 0, 0 /*fFlags*/); AssertRCReturn(rc, 49); +#if 0 + /* default initialization */ + m_enmTrunkType = kIntNetTrunkType_WhateverNone; +#endif Log2(("BaseService: parseArgs enter\n")); for (;;) @@ -133,58 +265,74 @@ int VBoxNetBaseService::parseArgs(int argc, char **argv) break; switch (rc) { - case 'N': - m_Name = Val.psz; + case 'N': // --name + m->m_Name = Val.psz; break; - case 'n': - m_Network = Val.psz; + + case 'n': // --network + m->m_Network = Val.psz; break; - case 't': - m_TrunkName = Val.psz; + + case 't': //--trunk-name + m->m_TrunkName = Val.psz; break; - case 'T': + + case 'T': //--trunk-type if (!strcmp(Val.psz, "none")) - m_enmTrunkType = kIntNetTrunkType_None; + m->m_enmTrunkType = kIntNetTrunkType_None; else if (!strcmp(Val.psz, "whatever")) - m_enmTrunkType = kIntNetTrunkType_WhateverNone; + m->m_enmTrunkType = kIntNetTrunkType_WhateverNone; else if (!strcmp(Val.psz, "netflt")) - m_enmTrunkType = kIntNetTrunkType_NetFlt; + m->m_enmTrunkType = kIntNetTrunkType_NetFlt; else if (!strcmp(Val.psz, "netadp")) - m_enmTrunkType = kIntNetTrunkType_NetAdp; + m->m_enmTrunkType = kIntNetTrunkType_NetAdp; else if (!strcmp(Val.psz, "srvnat")) - m_enmTrunkType = kIntNetTrunkType_SrvNat; + m->m_enmTrunkType = kIntNetTrunkType_SrvNat; else { RTStrmPrintf(g_pStdErr, "Invalid trunk type '%s'\n", Val.psz); return 1; } break; - case 'a': - m_MacAddress = Val.MacAddr; + + case 'a': // --mac-address + m->m_MacAddress = Val.MacAddr; break; - case 'i': - m_Ipv4Address = Val.IPv4Addr; + + case 'i': // --ip-address + m->m_Ipv4Address = Val.IPv4Addr; break; - case 'v': - m_cVerbosity++; + case 'm': // --netmask + m->m_Ipv4Netmask = Val.IPv4Addr; break; - case 'V': + case 'v': // --verbose + m->m_cVerbosity++; + break; + + case 'V': // --version (missed) RTPrintf("%sr%u\n", RTBldCfgVersion(), RTBldCfgRevision()); return 1; - case 'h': - RTPrintf("VBoxNetDHCP Version %s\n" + case 'M': // --need-main + m->m_fNeedMain = true; + break; + + case 'h': // --help (missed) + RTPrintf("%s Version %sr%u\n" "(C) 2009-" VBOX_C_YEAR " " VBOX_VENDOR "\n" "All rights reserved.\n" "\n" - "Usage: VBoxNetDHCP <options>\n" + "Usage: %s <options>\n" "\n" "Options:\n", - RTBldCfgVersion()); - for (unsigned int i = 0; i < m_vecOptionDefs.size(); i++) - RTPrintf(" -%c, %s\n", m_vecOptionDefs[i]->iShort, m_vecOptionDefs[i]->pszLong); + RTProcShortName(), + RTBldCfgVersion(), + RTBldCfgRevision(), + RTProcShortName()); + for (unsigned int i = 0; i < m->m_vecOptionDefs.size(); i++) + RTPrintf(" -%c, %s\n", m->m_vecOptionDefs[i]->iShort, m->m_vecOptionDefs[i]->pszLong); usage(); /* to print Service Specific usage */ return 1; @@ -203,17 +351,18 @@ int VBoxNetBaseService::parseArgs(int argc, char **argv) return rc; } + int VBoxNetBaseService::tryGoOnline(void) { /* * Open the session, load ring-0 and issue the request. */ - int rc = SUPR3Init(&m_pSession); + int rc = SUPR3Init(&m->m_pSession); if (RT_FAILURE(rc)) { - m_pSession = NIL_RTR0PTR; + m->m_pSession = NIL_RTR0PTR; LogRel(("VBoxNetBaseService: SUPR3Init -> %Rrc\n", rc)); - return 1; + return rc; } char szPath[RTPATH_MAX]; @@ -221,14 +370,14 @@ int VBoxNetBaseService::tryGoOnline(void) if (RT_FAILURE(rc)) { LogRel(("VBoxNetBaseService: RTPathExecDir -> %Rrc\n", rc)); - return 1; + return rc; } rc = SUPR3LoadVMM(strcat(szPath, "/VMMR0.r0")); if (RT_FAILURE(rc)) { LogRel(("VBoxNetBaseService: SUPR3LoadVMM(\"%s\") -> %Rrc\n", szPath, rc)); - return 1; + return rc; } /* @@ -238,15 +387,15 @@ int VBoxNetBaseService::tryGoOnline(void) INTNETOPENREQ OpenReq; OpenReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; OpenReq.Hdr.cbReq = sizeof(OpenReq); - OpenReq.pSession = m_pSession; - strncpy(OpenReq.szNetwork, m_Network.c_str(), sizeof(OpenReq.szNetwork)); + OpenReq.pSession = m->m_pSession; + strncpy(OpenReq.szNetwork, m->m_Network.c_str(), sizeof(OpenReq.szNetwork)); OpenReq.szNetwork[sizeof(OpenReq.szNetwork) - 1] = '\0'; - strncpy(OpenReq.szTrunk, m_TrunkName.c_str(), sizeof(OpenReq.szTrunk)); + strncpy(OpenReq.szTrunk, m->m_TrunkName.c_str(), sizeof(OpenReq.szTrunk)); OpenReq.szTrunk[sizeof(OpenReq.szTrunk) - 1] = '\0'; - OpenReq.enmTrunkType = m_enmTrunkType; + OpenReq.enmTrunkType = m->m_enmTrunkType; OpenReq.fFlags = 0; /** @todo check this */ - OpenReq.cbSend = m_cbSendBuf; - OpenReq.cbRecv = m_cbRecvBuf; + OpenReq.cbSend = m->m_cbSendBuf; + OpenReq.cbRecv = m->m_cbRecvBuf; OpenReq.hIf = INTNET_HANDLE_INVALID; /* @@ -257,10 +406,10 @@ int VBoxNetBaseService::tryGoOnline(void) if (RT_FAILURE(rc)) { Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_OPEN,) failed, rc=%Rrc\n", rc)); - goto bad; + return rc; } - m_hIf = OpenReq.hIf; - Log2(("successfully opened/created \"%s\" - hIf=%#x\n", OpenReq.szNetwork, m_hIf)); + m->m_hIf = OpenReq.hIf; + Log2(("successfully opened/created \"%s\" - hIf=%#x\n", OpenReq.szNetwork, m->m_hIf)); /* * Get the ring-3 address of the shared interface buffer. @@ -268,20 +417,20 @@ int VBoxNetBaseService::tryGoOnline(void) INTNETIFGETBUFFERPTRSREQ GetBufferPtrsReq; GetBufferPtrsReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; GetBufferPtrsReq.Hdr.cbReq = sizeof(GetBufferPtrsReq); - GetBufferPtrsReq.pSession = m_pSession; - GetBufferPtrsReq.hIf = m_hIf; + GetBufferPtrsReq.pSession = m->m_pSession; + GetBufferPtrsReq.hIf = m->m_hIf; GetBufferPtrsReq.pRing3Buf = NULL; GetBufferPtrsReq.pRing0Buf = NIL_RTR0PTR; rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS, 0, &GetBufferPtrsReq.Hdr); if (RT_FAILURE(rc)) { Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS,) failed, rc=%Rrc\n", rc)); - goto bad; + return rc; } pBuf = GetBufferPtrsReq.pRing3Buf; Log2(("pBuf=%p cbBuf=%d cbSend=%d cbRecv=%d\n", pBuf, pBuf->cbBuf, pBuf->cbSend, pBuf->cbRecv)); - m_pIfBuf = pBuf; + m->m_pIfBuf = pBuf; /* * Activate the interface. @@ -289,8 +438,8 @@ int VBoxNetBaseService::tryGoOnline(void) INTNETIFSETACTIVEREQ ActiveReq; ActiveReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; ActiveReq.Hdr.cbReq = sizeof(ActiveReq); - ActiveReq.pSession = m_pSession; - ActiveReq.hIf = m_hIf; + ActiveReq.pSession = m->m_pSession; + ActiveReq.hIf = m->m_hIf; ActiveReq.fActive = true; rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SET_ACTIVE, 0, &ActiveReq.Hdr); if (RT_SUCCESS(rc)) @@ -299,26 +448,313 @@ int VBoxNetBaseService::tryGoOnline(void) /* bail out */ Log2(("VBoxNetBaseService: SUPR3CallVMMR0Ex(,VMMR0_DO_INTNET_IF_SET_PROMISCUOUS_MODE,) failed, rc=%Rrc\n", rc)); - return 0; - bad: - return 1; + return VINF_SUCCESS; } + void VBoxNetBaseService::shutdown(void) { + syncEnter(); + m->fShutdown = true; + syncLeave(); +} + + +int VBoxNetBaseService::syncEnter() +{ + return RTCritSectEnter(&m->m_csThis); +} + + +int VBoxNetBaseService::syncLeave() +{ + return RTCritSectLeave(&m->m_csThis); +} + + +int VBoxNetBaseService::waitForIntNetEvent(int cMillis) +{ + int rc = VINF_SUCCESS; + INTNETIFWAITREQ WaitReq; + LogFlowFunc(("ENTER:cMillis: %d\n", cMillis)); + WaitReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + WaitReq.Hdr.cbReq = sizeof(WaitReq); + WaitReq.pSession = m->m_pSession; + WaitReq.hIf = m->m_hIf; + WaitReq.cMillies = cMillis; + + rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_WAIT, 0, &WaitReq.Hdr); + LogFlowFuncLeaveRC(rc); + return rc; +} + +/* S/G API */ +int VBoxNetBaseService::sendBufferOnWire(PCINTNETSEG pcSg, int cSg, size_t cbFrame) +{ + PINTNETHDR pHdr = NULL; + uint8_t *pu8Frame = NULL; + + /* Allocate frame */ + int rc = IntNetRingAllocateFrame(&m->m_pIfBuf->Send, cbFrame, &pHdr, (void **)&pu8Frame); + AssertRCReturn(rc, rc); + + /* Now we fill pvFrame with S/G above */ + int offFrame = 0; + for (int idxSg = 0; idxSg < cSg; ++idxSg) + { + memcpy(&pu8Frame[offFrame], pcSg[idxSg].pv, pcSg[idxSg].cb); + offFrame+=pcSg[idxSg].cb; + } + + /* Commit */ + IntNetRingCommitFrameEx(&m->m_pIfBuf->Send, pHdr, cbFrame); + + LogFlowFuncLeaveRC(rc); + return rc; } /** - * Print debug message depending on the m_cVerbosity level. - * - * @param iMinLevel The minimum m_cVerbosity level for this message. - * @param fMsg Whether to dump parts for the current DHCP message. - * @param pszFmt The message format string. - * @param ... Optional arguments. + * forcible ask for send packet on the "wire" */ -inline void VBoxNetBaseService::debugPrint(int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const +void VBoxNetBaseService::flushWire() { - if (iMinLevel <= m_cVerbosity) + int rc = VINF_SUCCESS; + INTNETIFSENDREQ SendReq; + SendReq.Hdr.u32Magic = SUPVMMR0REQHDR_MAGIC; + SendReq.Hdr.cbReq = sizeof(SendReq); + SendReq.pSession = m->m_pSession; + SendReq.hIf = m->m_hIf; + rc = SUPR3CallVMMR0Ex(NIL_RTR0PTR, NIL_VMCPUID, VMMR0_DO_INTNET_IF_SEND, 0, &SendReq.Hdr); + AssertRCReturnVoid(rc); + LogFlowFuncLeave(); + +} + + +int VBoxNetBaseService::hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort, + void const *pvData, size_t cbData) const +{ + return VBoxNetUDPBroadcast(m->m_pSession, m->m_hIf, m->m_pIfBuf, + m->m_Ipv4Address, &m->m_MacAddress, uSrcPort, + uDstPort, pvData, cbData); + +} + + +const std::string VBoxNetBaseService::getName() const +{ + return m->m_Name; +} + + +void VBoxNetBaseService::setName(const std::string& aName) +{ + m->m_Name = aName; +} + + +const std::string VBoxNetBaseService::getNetwork() const +{ + return m->m_Network; +} + + +void VBoxNetBaseService::setNetwork(const std::string& aNetwork) +{ + m->m_Network = aNetwork; +} + + +const RTMAC VBoxNetBaseService::getMacAddress() const +{ + return m->m_MacAddress; +} + + +void VBoxNetBaseService::setMacAddress(const RTMAC& aMac) +{ + m->m_MacAddress = aMac; +} + + +const RTNETADDRIPV4 VBoxNetBaseService::getIpv4Address() const +{ + return m->m_Ipv4Address; +} + + +void VBoxNetBaseService::setIpv4Address(const RTNETADDRIPV4& aAddress) +{ + m->m_Ipv4Address = aAddress; +} + + +const RTNETADDRIPV4 VBoxNetBaseService::getIpv4Netmask() const +{ + return m->m_Ipv4Netmask; +} + + +void VBoxNetBaseService::setIpv4Netmask(const RTNETADDRIPV4& aNetmask) +{ + m->m_Ipv4Netmask = aNetmask; +} + + +uint32_t VBoxNetBaseService::getSendBufSize() const +{ + return m->m_cbSendBuf; +} + + +void VBoxNetBaseService::setSendBufSize(uint32_t cbBuf) +{ + m->m_cbSendBuf = cbBuf; +} + + +uint32_t VBoxNetBaseService::getRecvBufSize() const +{ + return m->m_cbRecvBuf; +} + + +void VBoxNetBaseService::setRecvBufSize(uint32_t cbBuf) +{ + m->m_cbRecvBuf = cbBuf; +} + + +int32_t VBoxNetBaseService::getVerbosityLevel() const +{ + return m->m_cVerbosity; +} + + +void VBoxNetBaseService::setVerbosityLevel(int32_t aVerbosity) +{ + m->m_cVerbosity = aVerbosity; +} + + +void VBoxNetBaseService::addCommandLineOption(const PRTGETOPTDEF optDef) +{ + m->m_vecOptionDefs.push_back(optDef); +} + + +void VBoxNetBaseService::doReceiveLoop() +{ + int rc; + /* Well we're ready */ + PINTNETRINGBUF pRingBuf = &m->m_pIfBuf->Recv; + + for (;;) + { + /* + * Wait for a packet to become available. + */ + /* 2. waiting for request for */ + rc = waitForIntNetEvent(2000); + if (RT_FAILURE(rc)) + { + if (rc == VERR_TIMEOUT || rc == VERR_INTERRUPTED) + { + /* do we want interrupt anyone ??? */ + continue; + } + LogRel(("VBoxNetNAT: waitForIntNetEvent returned %Rrc\n", rc)); + AssertRCReturnVoid(rc); + } + + /* + * Process the receive buffer. + */ + PCINTNETHDR pHdr; + + while ((pHdr = IntNetRingGetNextFrameToRead(pRingBuf)) != NULL) + { + uint8_t const u8Type = pHdr->u8Type; + size_t cbFrame = pHdr->cbFrame; + switch (u8Type) + { + + case INTNETHDR_TYPE_FRAME: + { + void *pvFrame = IntNetHdrGetFramePtr(pHdr, m->m_pIfBuf); + rc = processFrame(pvFrame, cbFrame); + if (RT_FAILURE(rc) && rc == VERR_IGNORED) + { + /* XXX: UDP + ARP for DHCP */ + VBOXNETUDPHDRS Hdrs; + size_t cb; + void *pv = VBoxNetUDPMatch(m->m_pIfBuf, RTNETIPV4_PORT_BOOTPS, &m->m_MacAddress, + VBOXNETUDP_MATCH_UNICAST | VBOXNETUDP_MATCH_BROADCAST + | VBOXNETUDP_MATCH_CHECKSUM + | (m->m_cVerbosity > 2 ? VBOXNETUDP_MATCH_PRINT_STDERR : 0), + &Hdrs, &cb); + if (pv && cb) + processUDP(pv, cb); + else + VBoxNetArpHandleIt(m->m_pSession, m->m_hIf, m->m_pIfBuf, &m->m_MacAddress, m->m_Ipv4Address); + } + } + break; + case INTNETHDR_TYPE_GSO: + { + PCPDMNETWORKGSO pGso = IntNetHdrGetGsoContext(pHdr, m->m_pIfBuf); + rc = processGSO(pGso, cbFrame); + if (RT_FAILURE(rc) && rc == VERR_IGNORED) + break; + } + break; + case INTNETHDR_TYPE_PADDING: + break; + default: + break; + } + IntNetRingSkipFrame(&m->m_pIfBuf->Recv); + + } /* loop */ + } + +} + + +int VBoxNetBaseService::startReceiveThreadAndEnterEventLoop() +{ + AssertMsgReturn(isMainNeeded(), ("It's expected that we need Main"), VERR_INTERNAL_ERROR); + + /* start receiving thread */ + int rc = RTThreadCreate(&m->m_hThrRecv, /* thread handle*/ + &VBoxNetBaseService::Data::recvLoop, /* routine */ + this, /* user data */ + 128 * _1K, /* stack size */ + RTTHREADTYPE_IO, /* type */ + 0, /* flags, @todo: waitable ?*/ + "RECV"); + AssertRCReturn(rc,rc); + + m->m_EventQ = com::NativeEventQueue::getMainEventQueue(); + AssertPtrReturn(m->m_EventQ, VERR_INTERNAL_ERROR); + + while(true) + { + m->m_EventQ->processEventQueue(0); + + if (m->fShutdown) + break; + + m->m_EventQ->processEventQueue(500); + } + + return VINF_SUCCESS; +} + + +void VBoxNetBaseService::debugPrint(int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const +{ + if (iMinLevel <= m->m_cVerbosity) { va_list va; va_start(va, pszFmt); @@ -332,32 +768,37 @@ inline void VBoxNetBaseService::debugPrint(int32_t iMinLevel, bool fMsg, const c * Print debug message depending on the m_cVerbosity level. * * @param iMinLevel The minimum m_cVerbosity level for this message. - * @param fMsg Whether to dump parts for the current DHCP message. + * @param fMsg Whether to dump parts for the current service message. * @param pszFmt The message format string. * @param va Optional arguments. */ void VBoxNetBaseService::debugPrintV(int iMinLevel, bool fMsg, const char *pszFmt, va_list va) const { - if (iMinLevel <= m_cVerbosity) + if (iMinLevel <= m->m_cVerbosity) { va_list vaCopy; /* This dude is *very* special, thus the copy. */ va_copy(vaCopy, va); - RTStrmPrintf(g_pStdErr, "VBoxNetDHCP: %s: %N\n", iMinLevel >= 2 ? "debug" : "info", pszFmt, &vaCopy); + RTStrmPrintf(g_pStdErr, "%s: %s: %N\n", + RTProcShortName(), + iMinLevel >= 2 ? "debug" : "info", + pszFmt, + &vaCopy); va_end(vaCopy); } } + PRTGETOPTDEF VBoxNetBaseService::getOptionsPtr() { PRTGETOPTDEF pOptArray = NULL; - pOptArray = (PRTGETOPTDEF)RTMemAlloc(sizeof(RTGETOPTDEF) * m_vecOptionDefs.size()); + pOptArray = (PRTGETOPTDEF)RTMemAlloc(sizeof(RTGETOPTDEF) * m->m_vecOptionDefs.size()); if (!pOptArray) return NULL; - for (unsigned int i = 0; i < m_vecOptionDefs.size(); ++i) + for (unsigned int i = 0; i < m->m_vecOptionDefs.size(); ++i) { - PRTGETOPTDEF pOpt = m_vecOptionDefs[i]; - memcpy(&pOptArray[i], m_vecOptionDefs[i], sizeof(RTGETOPTDEF)); + PRTGETOPTDEF pOpt = m->m_vecOptionDefs[i]; + memcpy(&pOptArray[i], m->m_vecOptionDefs[i], sizeof(RTGETOPTDEF)); } return pOptArray; } diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h index eab9a515..a551bc88 100644 --- a/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h +++ b/src/VBox/NetworkServices/NetLib/VBoxNetBaseService.h @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2009 Oracle Corporation + * Copyright (C) 2009-2011 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -17,45 +17,127 @@ #ifndef ___VBoxNetBaseService_h___ #define ___VBoxNetBaseService_h___ -class VBoxNetBaseService + +#include <iprt/critsect.h> + + +class VBoxNetHlpUDPService { public: - VBoxNetBaseService(); +virtual int hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort, + void const *pvData, size_t cbData) const = 0; +}; + + +class VBoxNetLockee +{ +public: + virtual int syncEnter() = 0; + virtual int syncLeave() = 0; +}; + + +class VBoxNetALock +{ +public: + VBoxNetALock(VBoxNetLockee *a_lck):m_lck(a_lck) + { + if (m_lck) + m_lck->syncEnter(); + } + + ~VBoxNetALock() + { + if (m_lck) + m_lck->syncLeave(); + } + +private: + VBoxNetLockee *m_lck; +}; + +# ifndef BASE_SERVICES_ONLY +class VBoxNetBaseService: public VBoxNetHlpUDPService, public VBoxNetLockee +{ +public: + VBoxNetBaseService(const std::string& aName, const std::string& aNetworkName); virtual ~VBoxNetBaseService(); int parseArgs(int argc, char **argv); int tryGoOnline(void); void shutdown(void); + int syncEnter(); + int syncLeave(); + int waitForIntNetEvent(int cMillis); + int sendBufferOnWire(PCINTNETSEG pSg, int cSg, size_t cbBuffer); + void flushWire(); + + virtual int hlpUDPBroadcast(unsigned uSrcPort, unsigned uDstPort, + void const *pvData, size_t cbData) const; virtual void usage(void) = 0; - virtual void run(void) = 0; - virtual int init(void); virtual int parseOpt(int rc, const RTGETOPTUNION& getOptVal) = 0; + virtual int processFrame(void *, size_t) = 0; + virtual int processGSO(PCPDMNETWORKGSO, size_t) = 0; + virtual int processUDP(void *, size_t) = 0; - inline void debugPrint( int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const; - void debugPrintV(int32_t iMinLevel, bool fMsg, const char *pszFmt, va_list va) const; -public: - /** @name The server configuration data members. - * @{ */ - std::string m_Name; - std::string m_Network; - std::string m_TrunkName; - INTNETTRUNKTYPE m_enmTrunkType; - RTMAC m_MacAddress; - RTNETADDRIPV4 m_Ipv4Address; - /** @} */ - /** @name The network interface - * @{ */ - PSUPDRVSESSION m_pSession; - uint32_t m_cbSendBuf; - uint32_t m_cbRecvBuf; - INTNETIFHANDLE m_hIf; /**< The handle to the network interface. */ - PINTNETBUF m_pIfBuf; /**< Interface buffer. */ - std::vector<PRTGETOPTDEF> m_vecOptionDefs; - /** @} */ - /** @name Debug stuff - * @{ */ - int32_t m_cVerbosity; -private: + + virtual int init(void); + virtual int run(void); + virtual bool isMainNeeded() const; + +protected: + const std::string getName() const; + void setName(const std::string&); + + const std::string getNetwork() const; + void setNetwork(const std::string&); + + const RTMAC getMacAddress() const; + void setMacAddress(const RTMAC&); + + const RTNETADDRIPV4 getIpv4Address() const; + void setIpv4Address(const RTNETADDRIPV4&); + + const RTNETADDRIPV4 getIpv4Netmask() const; + void setIpv4Netmask(const RTNETADDRIPV4&); + + uint32_t getSendBufSize() const; + void setSendBufSize(uint32_t); + + uint32_t getRecvBufSize() const; + void setRecvBufSize(uint32_t); + + int32_t getVerbosityLevel() const; + void setVerbosityLevel(int32_t); + + void addCommandLineOption(const PRTGETOPTDEF); + + /** + * Print debug message depending on the m_cVerbosity level. + * + * @param iMinLevel The minimum m_cVerbosity level for this message. + * @param fMsg Whether to dump parts for the current DHCP message. + * @param pszFmt The message format string. + * @param ... Optional arguments. + */ + void debugPrint(int32_t iMinLevel, bool fMsg, const char *pszFmt, ...) const; + virtual void debugPrintV(int32_t iMinLevel, bool fMsg, const char *pszFmt, va_list va) const; + + private: + void doReceiveLoop(); + + /** starts receiving thread and enter event polling loop. */ + int startReceiveThreadAndEnterEventLoop(); + + protected: + /* VirtualBox instance */ + ComPtr<IVirtualBox> virtualbox; + + private: + struct Data; + Data *m; + + private: PRTGETOPTDEF getOptionsPtr(); - /** @} */ }; +# endif #endif diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp index 74a3998a..f9f7388f 100644 --- a/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp +++ b/src/VBox/NetworkServices/NetLib/VBoxNetIntIf.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2009 Oracle Corporation + * Copyright (C) 2009-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetLib.h b/src/VBox/NetworkServices/NetLib/VBoxNetLib.h index e1150bfb..2f7f699d 100644 --- a/src/VBox/NetworkServices/NetLib/VBoxNetLib.h +++ b/src/VBox/NetworkServices/NetLib/VBoxNetLib.h @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2009 Oracle Corporation + * Copyright (C) 2009-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp new file mode 100644 index 00000000..c5333025 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxNetPortForwardString.cpp @@ -0,0 +1,349 @@ +/* $Id: VBoxNetPortForwardString.cpp $ */ +/** @file + * VBoxNetPortForwardString - Routines for managing port-forward strings. + */ + +/* + * Copyright (C) 2006-2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/******************************************************************************* +* Header Files * +*******************************************************************************/ +#ifndef RT_OS_WINDOWS +#include <netinet/in.h> +#else +# include <Winsock2.h> +# include <Ws2ipdef.h> +#endif + +#include <iprt/cdefs.h> +#include <iprt/cidr.h> +#include <iprt/param.h> +#include <iprt/path.h> +#include <iprt/stream.h> +#include <iprt/string.h> +#include <iprt/net.h> +#include <iprt/getopt.h> +#include <iprt/ctype.h> + + +#include <VBox/log.h> + +#include "VBoxPortForwardString.h" + + +#define PF_FIELD_SEPARATOR ':' +#define PF_ADDRESS_FIELD_STARTS '[' +#define PF_ADDRESS_FIELD_ENDS ']' + +#define PF_STR_FIELD_SEPARATOR ":" +#define PF_STR_ADDRESS_FIELD_STARTS "[" +#define PF_STR_ADDRESS_FIELD_ENDS "]" + +static int netPfStrAddressParse(char *pszRaw, int cbRaw, + char *pszAddress, int cbAddress, + bool fEmptyAcceptable) +{ + int idxRaw = 0; + int cbField = 0; + + AssertPtrReturn(pszRaw, -1); + AssertPtrReturn(pszAddress, -1); + AssertReturn(pszRaw[0] == PF_ADDRESS_FIELD_STARTS, -1); + + if (pszRaw[0] == PF_ADDRESS_FIELD_STARTS) + { + /* shift pszRaw to next symbol */ + pszRaw++; + cbRaw--; + + + /* we shouldn't face with ending here */ + AssertReturn(cbRaw > 0, VERR_INVALID_PARAMETER); + + char *pszEndOfAddress = RTStrStr(pszRaw, PF_STR_ADDRESS_FIELD_ENDS); + + /* no pair closing sign */ + AssertPtrReturn(pszEndOfAddress, VERR_INVALID_PARAMETER); + + cbField = pszEndOfAddress - pszRaw; + + /* field should be less then the rest of the string */ + AssertReturn(cbField < cbRaw, VERR_INVALID_PARAMETER); + + if (cbField != 0) + RTStrCopy(pszAddress, RT_MIN(cbField + 1, cbAddress), pszRaw); + else if (!fEmptyAcceptable) + return -1; + } + + AssertReturn(pszRaw[cbField] == PF_ADDRESS_FIELD_ENDS, -1); + + return cbField + 2; /* length of the field and closing braces */ +} + + +static int netPfStrPortParse(char *pszRaw, int cbRaw, uint16_t *pu16Port) +{ + char *pszEndOfPort = NULL; + uint16_t u16Port = 0; + int idxRaw = 1; /* we increment pszRaw after checks. */ + int cbRest = 0; + size_t cbPort = 0; + + AssertPtrReturn(pszRaw, -1); + AssertPtrReturn(pu16Port, -1); + AssertReturn(pszRaw[0] == PF_FIELD_SEPARATOR, -1); + + pszRaw++; /* skip line separator */ + cbRaw --; + + pszEndOfPort = RTStrStr(pszRaw, ":"); + if (!pszEndOfPort) + { + cbRest = strlen(pszRaw); + + Assert(cbRaw == cbRest); + + /* XXX: Assumption that if string is too big, it will be reported by + * RTStrToUint16. + */ + if (cbRest > 0) + { + pszEndOfPort = pszRaw + cbRest; + cbPort = cbRest; + } + else + return -1; + } + else + cbPort = pszEndOfPort - pszRaw; + + + idxRaw += cbPort; + + Assert(cbRest || pszRaw[idxRaw - 1] == PF_FIELD_SEPARATOR); /* we are 1 char ahead */ + + char szPort[10]; + RT_ZERO(szPort); + + Assert(idxRaw > 0); + RTStrCopy(szPort, RT_MIN(sizeof(szPort), (size_t)(cbPort) + 1), pszRaw); + + if (!(u16Port = RTStrToUInt16(szPort))) + return -1; + + *pu16Port = u16Port; + + return idxRaw; +} + + +static int netPfStrAddressPortPairParse(char *pszRaw, int cbRaw, + char *pszAddress, int cbAddress, + bool fEmptyAddressAcceptable, + uint16_t *pu16Port) +{ + int idxRaw = 0; + int idxRawTotal = 0; + + AssertPtrReturn(pszRaw, -1); + AssertPtrReturn(pszAddress, -1); + AssertPtrReturn(pu16Port, -2); + + /* XXX: Here we should check 0 - ':' and 1 - '[' */ + Assert( pszRaw[0] == PF_FIELD_SEPARATOR + && pszRaw[1] == PF_ADDRESS_FIELD_STARTS); + + pszRaw++; /* field separator skip */ + cbRaw--; + AssertReturn(cbRaw > 0, VERR_INVALID_PARAMETER); + + idxRaw = 0; + + if (pszRaw[0] == PF_ADDRESS_FIELD_STARTS) + { + idxRaw += netPfStrAddressParse(pszRaw, + cbRaw - idxRaw, + pszAddress, + cbAddress, + fEmptyAddressAcceptable); + if (idxRaw == -1) + return -1; + + Assert(pszRaw[idxRaw] == PF_FIELD_SEPARATOR); + } + else return -1; + + pszRaw += idxRaw; + idxRawTotal += idxRaw; + cbRaw -= idxRaw; + + AssertReturn(cbRaw > 0, VERR_INVALID_PARAMETER); + + idxRaw = 0; + + Assert(pszRaw[0] == PF_FIELD_SEPARATOR); + + if (pszRaw[0] == PF_FIELD_SEPARATOR) + { + idxRaw = netPfStrPortParse(pszRaw, strlen(pszRaw), pu16Port); + + Assert(strlen(&pszRaw[idxRaw]) == 0 || pszRaw[idxRaw] == PF_FIELD_SEPARATOR); + + if (idxRaw == -1) + return -2; + + idxRawTotal += idxRaw; + + return idxRawTotal + 1; + } + else return -1; /* trailing garbage in the address */ +} + +/* XXX: Having fIPv6 we might emprove adress verification comparing address length + * with INET[6]_ADDRLEN + */ +int netPfStrToPf(const char *pcszStrPortForward, int fIPv6, PPORTFORWARDRULE pPfr) +{ + char *pszName; + int proto; + char *pszHostAddr; + char *pszGuestAddr; + uint16_t u16HostPort; + uint16_t u16GuestPort; + bool fTcpProto = false; + + char *pszRawBegin = NULL; + char *pszRaw = NULL; + int idxRaw = 0; + int cbToken = 0; + int cbRaw = 0; + int rc = VINF_SUCCESS; + + AssertPtrReturn(pcszStrPortForward, VERR_INVALID_PARAMETER); + AssertPtrReturn(pPfr, VERR_INVALID_PARAMETER); + + memset(pPfr, 0, sizeof(PORTFORWARDRULE)); + + pszHostAddr = &pPfr->szPfrHostAddr[0]; + pszGuestAddr = &pPfr->szPfrGuestAddr[0]; + pszName = &pPfr->szPfrName[0]; + + cbRaw = strlen(pcszStrPortForward); + + /* Minimal rule ":tcp:[]:0:[]:0" has got lenght 14 */ + AssertReturn(cbRaw > 14, VERR_INVALID_PARAMETER); + + pszRaw = RTStrDup(pcszStrPortForward); + + AssertPtrReturn(pszRaw, VERR_NO_MEMORY); + + pszRawBegin = pszRaw; + + /* name */ + if (pszRaw[idxRaw] == PF_FIELD_SEPARATOR) + idxRaw = 1; /* begin of the next segment */ + else + { + char *pszEndOfName = RTStrStr(pszRaw + 1, PF_STR_FIELD_SEPARATOR); + if (!pszEndOfName) + goto invalid_parameter; + + cbToken = (pszEndOfName) - pszRaw; /* don't take : into account */ + /* XXX it's unacceptable to have only name entry in PF */ + AssertReturn(cbToken < cbRaw, VERR_INVALID_PARAMETER); + + if ( cbToken < 0 + || (size_t)cbToken >= PF_NAMELEN) + goto invalid_parameter; + + RTStrCopy(pszName, + RT_MIN((size_t)cbToken + 1, PF_NAMELEN), + pszRaw); + pszRaw += cbToken; /* move to separator */ + } + + AssertReturn(pszRaw[0] == PF_FIELD_SEPARATOR, VERR_INVALID_PARAMETER); + /* protocol */ + + pszRaw++; /* skip separator */ + idxRaw = 0; + + cbRaw--; + + if ( (( fTcpProto = (RTStrNICmp(pszRaw, "tcp", 3) == 0) + || (RTStrNICmp(pszRaw, "udp", 3) == 0)) + && (pszRaw[3] == PF_FIELD_SEPARATOR))) + { + proto = (fTcpProto ? IPPROTO_TCP : IPPROTO_UDP); + idxRaw = 3; + } + else + goto invalid_parameter; + + pszRaw += idxRaw; + cbRaw -= idxRaw; + idxRaw = 0; + + idxRaw = netPfStrAddressPortPairParse(pszRaw, cbRaw, + pszHostAddr, INET6_ADDRSTRLEN, + true, &u16HostPort); + if (idxRaw < 0) + return VERR_INVALID_PARAMETER; + + pszRaw += idxRaw; + cbRaw -= idxRaw; + + Assert(pszRaw[0] == PF_FIELD_SEPARATOR); + + idxRaw = 0; + + idxRaw = netPfStrAddressPortPairParse(pszRaw, cbRaw, + pszGuestAddr, + INET6_ADDRSTRLEN, + false, + &u16GuestPort); + + if (idxRaw < 0) + goto invalid_parameter; + + /* XXX: fill the rule */ + pPfr->fPfrIPv6 = fIPv6; + pPfr->iPfrProto = proto; + + pPfr->u16PfrHostPort = u16HostPort; + + if (*pszGuestAddr == '\0') + goto invalid_parameter; /* guest address should be defined */ + + pPfr->u16PfrGuestPort = u16GuestPort; + + Log(("name: %s\n" + "proto: %d\n" + "host address: %s\n" + "host port: %d\n" + "guest address: %s\n" + "guest port:%d\n", + pszName, proto, + pszHostAddr, u16HostPort, + pszGuestAddr, u16GuestPort)); + + RTStrFree(pszRawBegin); + return VINF_SUCCESS; + +invalid_parameter: + RTStrFree(pszRawBegin); + if (pPfr) + memset(pPfr, 0, sizeof(PORTFORWARDRULE)); + return VERR_INVALID_PARAMETER; +} diff --git a/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp b/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp index 79c40c27..508e07d8 100644 --- a/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp +++ b/src/VBox/NetworkServices/NetLib/VBoxNetUDP.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2009 Oracle Corporation + * Copyright (C) 2009-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -61,14 +61,14 @@ void *VBoxNetUDPMatch(PINTNETBUF pBuf, unsigned uDstPort, PCRTMAC pDstMac, uint3 */ PCINTNETHDR pHdr = IntNetRingGetNextFrameToRead(&pBuf->Recv); if ( !pHdr - || ( pHdr->u16Type != INTNETHDR_TYPE_FRAME - && pHdr->u16Type != INTNETHDR_TYPE_GSO)) + || ( pHdr->u8Type != INTNETHDR_TYPE_FRAME + && pHdr->u8Type != INTNETHDR_TYPE_GSO)) return NULL; size_t cbFrame = pHdr->cbFrame; const void *pvFrame = IntNetHdrGetFramePtr(pHdr, pBuf); PCPDMNETWORKGSO pGso = NULL; - if (pHdr->u16Type == INTNETHDR_TYPE_GSO) + if (pHdr->u8Type == INTNETHDR_TYPE_GSO) { pGso = (PCPDMNETWORKGSO)pvFrame; if (!PDMNetGsoIsValid(pGso, cbFrame, cbFrame - sizeof(*pGso))) diff --git a/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h b/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h new file mode 100644 index 00000000..30264526 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/VBoxPortForwardString.h @@ -0,0 +1,56 @@ +/* $Id: VBoxPortForwardString.h $ */ +/** @file + * VBoxPortForwardString + */ + +/* + * Copyright (C) 2009-2010 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef ___VBoxPortForwardString_h___ +#define ___VBoxPortForwardString_h___ + +#include <iprt/net.h> +#include <VBox/intnet.h> + +RT_C_DECLS_BEGIN + +#define PF_NAMELEN 64 +/* + * TBD: Here is shared implementation of parsing port-forward string + * of format: + * name:[ipv4 or ipv6 address]:host-port:[ipv4 or ipv6 guest addr]:guest port + * + * This code supposed to be used in NetService and Frontend and perhaps in corresponding + * services. + * + * Note: ports are in host format. + */ + +typedef struct PORTFORWARDRULE +{ + char szPfrName[PF_NAMELEN]; + /* true if ipv6 and false otherwise */ + int fPfrIPv6; + /* IPPROTO_{UDP,TCP} */ + int iPfrProto; + char szPfrHostAddr[INET6_ADDRSTRLEN]; + uint16_t u16PfrHostPort; + char szPfrGuestAddr[INET6_ADDRSTRLEN]; + uint16_t u16PfrGuestPort; +} PORTFORWARDRULE, *PPORTFORWARDRULE; + +int netPfStrToPf(const char *pcszStrPortForward,int fIPv6, PPORTFORWARDRULE pPfr); + +RT_C_DECLS_END + +#endif + diff --git a/src/VBox/NetworkServices/NetLib/cpp/utils.h b/src/VBox/NetworkServices/NetLib/cpp/utils.h new file mode 100644 index 00000000..21224b36 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/cpp/utils.h @@ -0,0 +1,34 @@ +/* $Id$ */ +/** @file + * NetLib/cpp/utils.h + */ + +/* + * Copyright (C) 2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +#ifndef _NETLIB_CPP_UTILS_H_ +# define _NETLIB_CPP_UTILS_H_ + +/* less operator for IPv4 addresess */ +static bool operator <(const RTNETADDRIPV4& a, const RTNETADDRIPV4& b) +{ + return (RT_N2H_U32(a.u) < RT_N2H_U32(b.u)); +} + +/* Compares MAC addresses */ +static bool operator== (const RTMAC& lhs, const RTMAC& rhs) +{ + return ( lhs.au16[0] == rhs.au16[0] + && lhs.au16[1] == rhs.au16[1] + && lhs.au16[2] == rhs.au16[2]); +} +#endif diff --git a/src/VBox/NetworkServices/NetLib/shared_ptr.h b/src/VBox/NetworkServices/NetLib/shared_ptr.h new file mode 100644 index 00000000..ad892de7 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/shared_ptr.h @@ -0,0 +1,82 @@ +#ifndef __SHARED_PTR_H__ +#define __SHARED_PTR_H__ + +#ifdef __cplusplus +template<typename T> +class SharedPtr +{ + struct imp + { + imp(T *pTrg = NULL, int cnt = 1): ptr(pTrg),refcnt(cnt){} + ~imp() { if (ptr) delete ptr;} + + T *ptr; + int refcnt; + }; + + + public: + SharedPtr(T *t = NULL):p(NULL) + { + p = new imp(t); + } + + ~SharedPtr() + { + p->refcnt--; + + if (p->refcnt == 0) + delete p; + } + + + SharedPtr(const SharedPtr& rhs) + { + p = rhs.p; + p->refcnt++; + } + + const SharedPtr& operator= (const SharedPtr& rhs) + { + if (p == rhs.p) return *this; + + p->refcnt--; + if (p->refcnt == 0) + delete p; + + p = rhs.p; + p->refcnt++; + + return *this; + } + + + T *get() const + { + return p->ptr; + } + + + T *operator->() + { + return p->ptr; + } + + + const T*operator->() const + { + return p->ptr; + } + + + int use_count() + { + return p->refcnt; + } + + private: + imp *p; +}; +#endif + +#endif diff --git a/src/VBox/NetworkServices/NetLib/utils.h b/src/VBox/NetworkServices/NetLib/utils.h new file mode 100644 index 00000000..d96b5c03 --- /dev/null +++ b/src/VBox/NetworkServices/NetLib/utils.h @@ -0,0 +1,133 @@ +/* $Id: utils.h $ */ +/** @file + * ComHostUtils.cpp + */ + +/* + * Copyright (C) 2013 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + +/******************************************************************************* +* Header Files * +*******************************************************************************/ +#ifndef _NETLIB_UTILS_H_ +#define _NETLIB_UTILS_H_ + +#include "cpp/utils.h" + +typedef ComPtr<IVirtualBox> ComVirtualBoxPtr; +typedef ComPtr<IDHCPServer> ComDhcpServerPtr; +typedef ComPtr<IHost> ComHostPtr; +typedef ComPtr<INATNetwork> ComNatPtr; +typedef com::SafeArray<BSTR> ComBstrArray; + +typedef std::vector<RTNETADDRIPV4> AddressList; +typedef std::map<RTNETADDRIPV4, int> AddressToOffsetMapping; + + +inline bool isDhcpRequired(const ComNatPtr& nat) +{ + BOOL fNeedDhcpServer = false; + if (FAILED(nat->COMGETTER(NeedDhcpServer)(&fNeedDhcpServer))) + return false; + + return fNeedDhcpServer; +} + + +inline int findDhcpServer(const ComVirtualBoxPtr& vbox, const std::string& name, ComDhcpServerPtr& dhcp) +{ + HRESULT hrc = vbox->FindDHCPServerByNetworkName(com::Bstr(name.c_str()).raw(), + dhcp.asOutParam()); + AssertComRCReturn(hrc, VERR_NOT_FOUND); + + return VINF_SUCCESS; +} + + +inline int findNatNetwork(const ComVirtualBoxPtr& vbox, const std::string& name, ComNatPtr& nat) +{ + HRESULT hrc = vbox->FindNATNetworkByName(com::Bstr(name.c_str()).raw(), + nat.asOutParam()); + + AssertComRCReturn(hrc, VERR_NOT_FOUND); + + return VINF_SUCCESS; +} + + +inline RTNETADDRIPV4 networkid(const RTNETADDRIPV4& addr, const RTNETADDRIPV4& netmask) +{ + RTNETADDRIPV4 netid; + netid.u = addr.u & netmask.u; + return netid; +} + + +int localMappings(const ComNatPtr&, AddressToOffsetMapping&); +int hostDnsServers(const ComHostPtr&, const RTNETADDRIPV4&,/* const */ AddressToOffsetMapping&, AddressList&); +int hostDnsSearchList(const ComHostPtr&, std::vector<std::string>&); +int hostDnsDomain(const ComHostPtr&, std::string& domainStr); + + +class NATNetworkEventAdapter +{ + public: + virtual HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent) = 0; +}; + + +class NATNetworkListener +{ + public: + NATNetworkListener():m_pNAT(NULL){} + + HRESULT init(NATNetworkEventAdapter *pNAT) + { + AssertPtrReturn(pNAT, E_INVALIDARG); + + m_pNAT = pNAT; + return S_OK; + } + + HRESULT init() + { + m_pNAT = NULL; + return S_OK; + } + + void uninit() { m_pNAT = NULL; } + + HRESULT HandleEvent(VBoxEventType_T aEventType, IEvent *pEvent) + { + if (m_pNAT) + return m_pNAT->HandleEvent(aEventType, pEvent); + else + return E_FAIL; + } + + private: + NATNetworkEventAdapter *m_pNAT; +}; +typedef ListenerImpl<NATNetworkListener, NATNetworkEventAdapter*> NATNetworkListenerImpl; + +# if VBOX_WITH_XPCOM +class NS_CLASSINFO_NAME(NATNetworkListenerImpl); +# endif + +typedef ComPtr<NATNetworkListenerImpl> ComNatListenerPtr; +typedef com::SafeArray<VBoxEventType_T> ComEventTypeArray; + +/* XXX: const is commented out because of compilation erro on Windows host, but it's intended that this function + isn't modify event type array */ +int createNatListener(ComNatListenerPtr& listener, const ComVirtualBoxPtr& vboxptr, + NATNetworkEventAdapter *adapter, /* const */ ComEventTypeArray& events); +#endif |