diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2014-03-26 19:21:20 +0000 |
---|---|---|
committer | <> | 2014-05-08 15:03:54 +0000 |
commit | fb123f93f9f5ce42c8e5785d2f8e0edaf951740e (patch) | |
tree | c2103d76aec5f1f10892cd1d3a38e24f665ae5db /src/VBox/Storage/VD.cpp | |
parent | 58ed4748338f9466599adfc8a9171280ed99e23f (diff) | |
download | VirtualBox-master.tar.gz |
Imported from /home/lorry/working-area/delta_VirtualBox/VirtualBox-4.3.10.tar.bz2.HEADVirtualBox-4.3.10master
Diffstat (limited to 'src/VBox/Storage/VD.cpp')
-rw-r--r-- | src/VBox/Storage/VD.cpp | 3115 |
1 files changed, 1664 insertions, 1451 deletions
diff --git a/src/VBox/Storage/VD.cpp b/src/VBox/Storage/VD.cpp index a4ae99e5..5632e133 100644 --- a/src/VBox/Storage/VD.cpp +++ b/src/VBox/Storage/VD.cpp @@ -4,7 +4,7 @@ */ /* - * Copyright (C) 2006-2012 Oracle Corporation + * Copyright (C) 2006-2013 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -36,9 +36,9 @@ #include <iprt/param.h> #include <iprt/memcache.h> #include <iprt/sg.h> -#include <iprt/critsect.h> #include <iprt/list.h> #include <iprt/avl.h> +#include <iprt/semaphore.h> #include <VBox/vd-plugin.h> #include <VBox/vd-cache-plugin.h> @@ -97,6 +97,9 @@ typedef struct VDIO bool fIgnoreFlush; } VDIO, *PVDIO; +/** Forward declaration of an I/O task */ +typedef struct VDIOTASK *PVDIOTASK; + /** * VBox HDD Container image descriptor. */ @@ -234,31 +237,41 @@ struct VBOXHDD RTMEMCACHE hMemCacheIoCtx; /** Memory cache for I/O tasks. */ RTMEMCACHE hMemCacheIoTask; - /** Critical section protecting the disk against concurrent access. */ - RTCRITSECT CritSect; - /** Head of queued I/O contexts - LIFO order. */ - volatile PVDIOCTX pIoCtxHead; - /** Flag whether the disk is currently locked by growing write or a flush - * request. Other flush or growing write requests need to wait until - * the current one completes. - */ + /** An I/O context is currently using the disk structures + * Every I/O context must be placed on one of the lists below. */ volatile bool fLocked; - /** List of waiting requests. - Protected by the critical section. */ - RTLISTNODE ListWriteLocked; - /** I/O context which locked the disk. */ - PVDIOCTX pIoCtxLockOwner; + /** Head of pending I/O tasks waiting for completion - LIFO order. */ + volatile PVDIOTASK pIoTasksPendingHead; + /** Head of newly queued I/O contexts - LIFO order. */ + volatile PVDIOCTX pIoCtxHead; + /** Head of halted I/O contexts which are given back to generic + * disk framework by the backend. - LIFO order. */ + volatile PVDIOCTX pIoCtxHaltedHead; + + /** Head of blocked I/O contexts, processed only + * after pIoCtxLockOwner was freed - LIFO order. */ + volatile PVDIOCTX pIoCtxBlockedHead; + /** I/O context which locked the disk for a growing write or flush request. + * Other flush or growing write requests need to wait until + * the current one completes. - NIL_VDIOCTX if unlocked. */ + volatile PVDIOCTX pIoCtxLockOwner; /** Pointer to the L2 disk cache if any. */ PVDCACHE pCache; /** Pointer to the discard state if any. */ PVDDISCARDSTATE pDiscard; + + /** Event semaphore for synchronous I/O. */ + RTSEMEVENT hEventSemSyncIo; + /** Status code of the last synchronous I/O request. */ + int rcSync; }; -# define VD_THREAD_IS_CRITSECT_OWNER(Disk) \ +# define VD_IS_LOCKED(a_pDisk) \ do \ { \ - AssertMsg(RTCritSectIsOwner(&Disk->CritSect), \ - ("Thread does not own critical section\n"));\ + AssertMsg(a_pDisk->fLocked, \ + ("Lock not held\n"));\ } while(0) /** @@ -305,8 +318,8 @@ typedef struct VDIOCTX PVBOXHDD pDisk; /** Return code. */ int rcReq; - /** Flag whether the I/O context is blocked because it is in the growing list. */ - bool fBlocked; + /** Various flags for the I/O context. */ + uint32_t fFlags; /** Number of data transfers currently pending. */ volatile uint32_t cDataTransfersPending; /** How many meta data transfers are pending. */ @@ -341,6 +354,12 @@ typedef struct VDIOCTX PVDIMAGE pImageStart; /** S/G buffer */ RTSGBUF SgBuf; + /** Number of bytes to clear in the buffer before the current read. */ + size_t cbBufClear; + /** Number of images to read. */ + unsigned cImagesRead; + /** Override for the parent image to start reading from. */ + PVDIMAGE pImageParentOverride; } Io; /** Discard requests. */ struct @@ -409,6 +428,31 @@ typedef struct VDIOCTX } Type; } VDIOCTX; +/** Default flags for an I/O context, i.e. unblocked and async. */ +#define VDIOCTX_FLAGS_DEFAULT (0) +/** Flag whether the context is blocked. */ +#define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0) +/** Flag whether the I/O context is using synchronous I/O. */ +#define VDIOCTX_FLAGS_SYNC RT_BIT_32(1) +/** Flag whether the read should update the cache. */ +#define VDIOCTX_FLAGS_READ_UPDATE_CACHE RT_BIT_32(2) +/** Flag whether free blocks should be zeroed. + * If false and no image has data for sepcified + * range VERR_VD_BLOCK_FREE is returned for the I/O context. + * Note that unallocated blocks are still zeroed + * if at least one image has valid data for a part + * of the range. + */ +#define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3) +/** Don't free the I/O context when complete because + * it was alloacted elsewhere (stack, ...). */ +#define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4) +/* Don't set the modified flag for this I/O context when writing. */ +#define VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG RT_BIT_32(5) + +/** NIL I/O context pointer value. */ +#define NIL_VDIOCTX ((PVDIOCTX)0) + /** * List node for deferred I/O contexts. */ @@ -429,12 +473,16 @@ typedef struct VDIOCTXDEFERRED */ typedef struct VDIOTASK { + /** Next I/O task waiting in the list. */ + struct VDIOTASK * volatile pNext; /** Storage this task belongs to. */ PVDIOSTORAGE pIoStorage; /** Optional completion callback. */ PFNVDXFERCOMPLETED pfnComplete; /** Opaque user data. */ void *pvUser; + /** Completion status code for the task. */ + int rcReq; /** Flag whether this is a meta data transfer. */ bool fMeta; /** Type dependent data. */ @@ -455,7 +503,7 @@ typedef struct VDIOTASK PVDMETAXFER pMetaXfer; } Meta; } Type; -} VDIOTASK, *PVDIOTASK; +} VDIOTASK; /** * Storage handle. @@ -549,6 +597,10 @@ static PVDCACHEBACKEND aStaticCacheBackends[] = /** Forward declaration of the async discard helper. */ static int vdDiscardHelperAsync(PVDIOCTX pIoCtx); +static int vdWriteHelperAsync(PVDIOCTX pIoCtx); +static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk); +static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc); +static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq); /** * internal: add several backends. @@ -765,6 +817,41 @@ static PVDIMAGE vdGetImageByNumber(PVBOXHDD pDisk, unsigned nImage) } /** + * Initialize the structure members of a given I/O context. + */ +DECLINLINE(void) vdIoCtxInit(PVDIOCTX pIoCtx, PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, + uint64_t uOffset, size_t cbTransfer, PVDIMAGE pImageStart, + PCRTSGBUF pcSgBuf, void *pvAllocation, + PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags) +{ + pIoCtx->pDisk = pDisk; + pIoCtx->enmTxDir = enmTxDir; + pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTransfer; Assert((uint32_t)cbTransfer == cbTransfer); + pIoCtx->Req.Io.uOffset = uOffset; + pIoCtx->Req.Io.cbTransfer = cbTransfer; + pIoCtx->Req.Io.pImageStart = pImageStart; + pIoCtx->Req.Io.pImageCur = pImageStart; + pIoCtx->Req.Io.cbBufClear = 0; + pIoCtx->Req.Io.pImageParentOverride = NULL; + pIoCtx->cDataTransfersPending = 0; + pIoCtx->cMetaTransfersPending = 0; + pIoCtx->fComplete = false; + pIoCtx->fFlags = fFlags; + pIoCtx->pvAllocation = pvAllocation; + pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer; + pIoCtx->pfnIoCtxTransferNext = NULL; + pIoCtx->rcReq = VINF_SUCCESS; + pIoCtx->pIoCtxParent = NULL; + + /* There is no S/G list for a flush request. */ + if ( enmTxDir != VDIOCTXTXDIR_FLUSH + && enmTxDir != VDIOCTXTXDIR_DISCARD) + RTSgBufClone(&pIoCtx->Req.Io.SgBuf, pcSgBuf); + else + memset(&pIoCtx->Req.Io.SgBuf, 0, sizeof(RTSGBUF)); +} + +/** * Internal: Tries to read the desired range from the given cache. * * @returns VBox status code. @@ -773,8 +860,8 @@ static PVDIMAGE vdGetImageByNumber(PVBOXHDD pDisk, unsigned nImage) * Everything thereafter might be in the cache. * @param pCache The cache to read from. * @param uOffset Offset of the virtual disk to read. - * @param pvBuf Where to store the read data. * @param cbRead How much to read. + * @param pIoCtx The I/O context to read into. * @param pcbRead Where to store the number of bytes actually read. * On success this indicates the number of bytes read from the cache. * If VERR_VD_BLOCK_FREE is returned this gives the number of bytes @@ -783,18 +870,18 @@ static PVDIMAGE vdGetImageByNumber(PVBOXHDD pDisk, unsigned nImage) * might or might not be in the cache. */ static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset, - void *pvBuf, size_t cbRead, size_t *pcbRead) + size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead) { int rc = VINF_SUCCESS; - LogFlowFunc(("pCache=%#p uOffset=%llu pvBuf=%#p cbRead=%zu pcbRead=%#p\n", - pCache, uOffset, pvBuf, cbRead, pcbRead)); + LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbRead=%zu pcbRead=%#p\n", + pCache, uOffset, pIoCtx, cbRead, pcbRead)); AssertPtr(pCache); AssertPtr(pcbRead); - rc = pCache->Backend->pfnRead(pCache->pBackendData, uOffset, pvBuf, - cbRead, pcbRead); + rc = pCache->Backend->pfnRead(pCache->pBackendData, uOffset, cbRead, + pIoCtx, pcbRead); LogFlowFunc(("returns rc=%Rrc pcbRead=%zu\n", rc, *pcbRead)); return rc; @@ -806,38 +893,38 @@ static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset, * @returns VBox status code. * @param pCache The cache to write to. * @param uOffset Offset of the virtual disk to write to the cache. - * @param pcvBuf The data to write. * @param cbWrite How much to write. + * @param pIoCtx The I/O context to αΊƒrite from. * @param pcbWritten How much data could be written, optional. */ -static int vdCacheWriteHelper(PVDCACHE pCache, uint64_t uOffset, const void *pcvBuf, - size_t cbWrite, size_t *pcbWritten) +static int vdCacheWriteHelper(PVDCACHE pCache, uint64_t uOffset, size_t cbWrite, + PVDIOCTX pIoCtx, size_t *pcbWritten) { int rc = VINF_SUCCESS; - LogFlowFunc(("pCache=%#p uOffset=%llu pvBuf=%#p cbWrite=%zu pcbWritten=%#p\n", - pCache, uOffset, pcvBuf, cbWrite, pcbWritten)); + LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbWrite=%zu pcbWritten=%#p\n", + pCache, uOffset, pIoCtx, cbWrite, pcbWritten)); AssertPtr(pCache); - AssertPtr(pcvBuf); + AssertPtr(pIoCtx); Assert(cbWrite > 0); if (pcbWritten) - rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, pcvBuf, - cbWrite, pcbWritten); + rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite, + pIoCtx, pcbWritten); else { size_t cbWritten = 0; do { - rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, pcvBuf, - cbWrite, &cbWritten); + rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite, + pIoCtx, &cbWritten); uOffset += cbWritten; - pcvBuf = (char *)pcvBuf + cbWritten; cbWrite -= cbWritten; } while ( cbWrite - && RT_SUCCESS(rc)); + && ( RT_SUCCESS(rc) + || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)); } LogFlowFunc(("returns rc=%Rrc pcbWritten=%zu\n", @@ -846,185 +933,6 @@ static int vdCacheWriteHelper(PVDCACHE pCache, uint64_t uOffset, const void *pcv } /** - * Internal: Reads a given amount of data from the image chain of the disk. - **/ -static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, - uint64_t uOffset, void *pvBuf, size_t cbRead, size_t *pcbThisRead) -{ - int rc = VINF_SUCCESS; - size_t cbThisRead = cbRead; - - AssertPtr(pcbThisRead); - - *pcbThisRead = 0; - - /* - * Try to read from the given image. - * If the block is not allocated read from override chain if present. - */ - rc = pImage->Backend->pfnRead(pImage->pBackendData, - uOffset, pvBuf, cbThisRead, - &cbThisRead); - - if (rc == VERR_VD_BLOCK_FREE) - { - for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev; - pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE; - pCurrImage = pCurrImage->pPrev) - { - rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, - uOffset, pvBuf, cbThisRead, - &cbThisRead); - } - } - - if (RT_SUCCESS(rc) || rc == VERR_VD_BLOCK_FREE) - *pcbThisRead = cbThisRead; - - return rc; -} - -/** - * Extended version of vdReadHelper(), implementing certain optimizations - * for image cloning. - * - * @returns VBox status code. - * @param pDisk The disk to read from. - * @param pImage The image to start reading from. - * @param pImageParentOverride The parent image to read from - * if the starting image returns a free block. - * If NULL is passed the real parent of the image - * in the chain is used. - * @param uOffset Offset in the disk to start reading from. - * @param pvBuf Where to store the read data. - * @param cbRead How much to read. - * @param fZeroFreeBlocks Flag whether free blocks should be zeroed. - * If false and no image has data for sepcified - * range VERR_VD_BLOCK_FREE is returned. - * Note that unallocated blocks are still zeroed - * if at least one image has valid data for a part - * of the range. - * @param fUpdateCache Flag whether to update the attached cache if - * available. - * @param cImagesRead Number of images in the chain to read until - * the read is cut off. A value of 0 disables the cut off. - */ -static int vdReadHelperEx(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, - uint64_t uOffset, void *pvBuf, size_t cbRead, - bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead) -{ - int rc = VINF_SUCCESS; - size_t cbThisRead; - bool fAllFree = true; - size_t cbBufClear = 0; - - /* Loop until all read. */ - do - { - /* Search for image with allocated block. Do not attempt to read more - * than the previous reads marked as valid. Otherwise this would return - * stale data when different block sizes are used for the images. */ - cbThisRead = cbRead; - - if ( pDisk->pCache - && !pImageParentOverride) - { - rc = vdCacheReadHelper(pDisk->pCache, uOffset, pvBuf, - cbThisRead, &cbThisRead); - - if (rc == VERR_VD_BLOCK_FREE) - { - rc = vdDiskReadHelper(pDisk, pImage, NULL, uOffset, pvBuf, cbThisRead, - &cbThisRead); - - /* If the read was successful, write the data back into the cache. */ - if ( RT_SUCCESS(rc) - && fUpdateCache) - { - rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, - cbThisRead, NULL); - } - } - } - else - { - /** @todo can be be replaced by vdDiskReadHelper if it proves to be reliable, - * don't want to be responsible for data corruption... - */ - /* - * Try to read from the given image. - * If the block is not allocated read from override chain if present. - */ - rc = pImage->Backend->pfnRead(pImage->pBackendData, - uOffset, pvBuf, cbThisRead, - &cbThisRead); - - if ( rc == VERR_VD_BLOCK_FREE - && cImagesRead != 1) - { - unsigned cImagesToProcess = cImagesRead; - - for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev; - pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE; - pCurrImage = pCurrImage->pPrev) - { - rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, - uOffset, pvBuf, cbThisRead, - &cbThisRead); - if (cImagesToProcess == 1) - break; - else if (cImagesToProcess > 0) - cImagesToProcess--; - } - } - } - - /* No image in the chain contains the data for the block. */ - if (rc == VERR_VD_BLOCK_FREE) - { - /* Fill the free space with 0 if we are told to do so - * or a previous read returned valid data. */ - if (fZeroFreeBlocks || !fAllFree) - memset(pvBuf, '\0', cbThisRead); - else - cbBufClear += cbThisRead; - - if (pImage->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) - rc = VINF_VD_NEW_ZEROED_BLOCK; - else - rc = VINF_SUCCESS; - } - else if (RT_SUCCESS(rc)) - { - /* First not free block, fill the space before with 0. */ - if (!fZeroFreeBlocks) - { - memset((char *)pvBuf - cbBufClear, '\0', cbBufClear); - cbBufClear = 0; - fAllFree = false; - } - } - - cbRead -= cbThisRead; - uOffset += cbThisRead; - pvBuf = (char *)pvBuf + cbThisRead; - } while (cbRead != 0 && RT_SUCCESS(rc)); - - return (!fZeroFreeBlocks && fAllFree) ? VERR_VD_BLOCK_FREE : rc; -} - -/** - * internal: read the specified amount of data in whatever blocks the backend - * will give us. - */ -static int vdReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, uint64_t uOffset, - void *pvBuf, size_t cbRead, bool fUpdateCache) -{ - return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead, - true /* fZeroFreeBlocks */, fUpdateCache, 0); -} - -/** * Creates a new empty discard state. * * @returns Pointer to the new discard state or NULL if out of memory. @@ -1075,7 +983,7 @@ static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_ uint32_t idxStart = 0; size_t cbLeft = pBlock->cbDiscard; bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart); - uint32_t cSectors = pBlock->cbDiscard / 512; + uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512); while (cbLeft > 0) { @@ -1099,9 +1007,14 @@ static int vdDiscardRemoveBlocks(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, size_ if (idxEnd != -1) cbThis = (idxEnd - idxStart) * 512; - rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, offStart, - cbThis, NULL, NULL, &cbThis, - NULL, VD_DISCARD_MARK_UNUSED); + + VDIOCTX IoCtx; + vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL, + NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC); + rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, + &IoCtx, offStart, cbThis, NULL, + NULL, &cbThis, NULL, + VD_DISCARD_MARK_UNUSED); if (RT_FAILURE(rc)) break; @@ -1154,167 +1067,6 @@ static int vdDiscardStateDestroy(PVBOXHDD pDisk) } /** - * Discards the given range from the underlying block. - * - * @returns VBox status code. - * @param pDisk VD container data. - * @param offStart Where to start discarding. - * @param cbDiscard How many bytes to discard. - */ -static int vdDiscardRange(PVBOXHDD pDisk, PVDDISCARDSTATE pDiscard, uint64_t offStart, size_t cbDiscard) -{ - int rc = VINF_SUCCESS; - - LogFlowFunc(("pDisk=%#p pDiscard=%#p offStart=%llu cbDiscard=%zu\n", - pDisk, pDiscard, offStart, cbDiscard)); - - do - { - size_t cbThisDiscard; - - /* Look for a matching block in the AVL tree first. */ - PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, false); - if (!pBlock || pBlock->Core.KeyLast < offStart) - { - void *pbmAllocated = NULL; - size_t cbPreAllocated, cbPostAllocated; - PVDDISCARDBLOCK pBlockAbove = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, true); - - /* Clip range to remain in the current block. */ - if (pBlockAbove) - cbThisDiscard = RT_MIN(cbDiscard, pBlockAbove->Core.KeyLast - offStart + 1); - else - cbThisDiscard = cbDiscard; - - Assert(!(cbThisDiscard % 512)); - - /* No block found, try to discard using the backend first. */ - rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, offStart, - cbThisDiscard, &cbPreAllocated, - &cbPostAllocated, &cbThisDiscard, - &pbmAllocated, 0); - if (rc == VERR_VD_DISCARD_ALIGNMENT_NOT_MET) - { - /* Create new discard block. */ - pBlock = (PVDDISCARDBLOCK)RTMemAllocZ(sizeof(VDDISCARDBLOCK)); - if (pBlock) - { - pBlock->Core.Key = offStart - cbPreAllocated; - pBlock->Core.KeyLast = offStart + cbThisDiscard + cbPostAllocated - 1; - pBlock->cbDiscard = cbPreAllocated + cbThisDiscard + cbPostAllocated; - pBlock->pbmAllocated = pbmAllocated; - bool fInserted = RTAvlrU64Insert(pDiscard->pTreeBlocks, &pBlock->Core); - Assert(fInserted); - - RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru); - pDiscard->cbDiscarding += pBlock->cbDiscard; - if (pDiscard->cbDiscarding > VD_DISCARD_REMOVE_THRESHOLD) - rc = vdDiscardRemoveBlocks(pDisk, pDiscard, VD_DISCARD_REMOVE_THRESHOLD); - else - rc = VINF_SUCCESS; - } - else - { - RTMemFree(pbmAllocated); - rc = VERR_NO_MEMORY; - } - } - } - else - { - /* Range lies partly in the block, update allocation bitmap. */ - int32_t idxStart, idxEnd; - - cbThisDiscard = RT_MIN(cbDiscard, pBlock->Core.KeyLast - offStart + 1); - - AssertPtr(pBlock); - - Assert(!(cbThisDiscard % 512)); - Assert(!((offStart - pBlock->Core.Key) % 512)); - - idxStart = (offStart - pBlock->Core.Key) / 512; - idxEnd = idxStart + (cbThisDiscard / 512); - - ASMBitClearRange(pBlock->pbmAllocated, idxStart, idxEnd); - - /* Call the backend to discard the block if it is completely unallocated now. */ - if (ASMBitFirstSet((volatile void *)pBlock->pbmAllocated, pBlock->cbDiscard / 512) == -1) - { - size_t cbPreAllocated, cbPostAllocated, cbActuallyDiscarded; - - rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pBlock->Core.Key, - pBlock->cbDiscard, &cbPreAllocated, - &cbPostAllocated, &cbActuallyDiscarded, - NULL, 0); - Assert(rc != VERR_VD_DISCARD_ALIGNMENT_NOT_MET); - Assert(!cbPreAllocated); - Assert(!cbPostAllocated); - Assert(cbActuallyDiscarded == pBlock->cbDiscard || RT_FAILURE(rc)); - - /* Remove the block on success. */ - if (RT_SUCCESS(rc)) - { - PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key); - Assert(pBlockRemove == pBlock); - - pDiscard->cbDiscarding -= pBlock->cbDiscard; - RTListNodeRemove(&pBlock->NodeLru); - RTMemFree(pBlock->pbmAllocated); - RTMemFree(pBlock); - } - } - else - { - RTListNodeRemove(&pBlock->NodeLru); - RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru); - rc = VINF_SUCCESS; - } - } - - Assert(cbDiscard >= cbThisDiscard); - - cbDiscard -= cbThisDiscard; - offStart += cbThisDiscard; - } while (cbDiscard != 0 && RT_SUCCESS(rc)); - - LogFlowFunc(("returns rc=%Rrc\n", rc)); - return rc; -} - -/** - * Discard helper. - * - * @returns VBox status code. - * @param pDisk VD container data. - * @param paRanges The array of ranges to discard. - * @param cRanges The number of ranges in the array. - */ -static int vdDiscardHelper(PVBOXHDD pDisk, PCRTRANGE paRanges, unsigned cRanges) -{ - int rc = VINF_SUCCESS; - PVDDISCARDSTATE pDiscard = pDisk->pDiscard; - - if (RT_UNLIKELY(!pDiscard)) - { - pDiscard = vdDiscardStateCreate(); - if (!pDiscard) - return VERR_NO_MEMORY; - - pDisk->pDiscard = pDiscard; - } - - /* Go over the range array and discard individual blocks. */ - for (unsigned i = 0; i < cRanges; i++) - { - rc = vdDiscardRange(pDisk, pDiscard, paRanges[i].offStart, paRanges[i].cbRange); - if (RT_FAILURE(rc)) - break; - } - - return rc; -} - -/** * Marks the given range as allocated in the image. * Required if there are discards in progress and a write to a block which can get discarded * is written to. @@ -1346,7 +1098,7 @@ static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t c cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1); idxStart = (uOffset - pBlock->Core.Key) / 512; - idxEnd = idxStart + (cbThisRange / 512); + idxEnd = idxStart + (int32_t)(cbThisRange / 512); ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd); } else @@ -1368,36 +1120,17 @@ static int vdDiscardSetRangeAllocated(PVBOXHDD pDisk, uint64_t uOffset, size_t c DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, uint64_t uOffset, size_t cbTransfer, - PVDIMAGE pImageStart, - PCRTSGBUF pcSgBuf, void *pvAllocation, - PFNVDIOCTXTRANSFER pfnIoCtxTransfer) + PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf, + void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer, + uint32_t fFlags) { PVDIOCTX pIoCtx = NULL; pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx); if (RT_LIKELY(pIoCtx)) { - pIoCtx->pDisk = pDisk; - pIoCtx->enmTxDir = enmTxDir; - pIoCtx->Req.Io.cbTransferLeft = cbTransfer; - pIoCtx->Req.Io.uOffset = uOffset; - pIoCtx->Req.Io.cbTransfer = cbTransfer; - pIoCtx->Req.Io.pImageStart = pImageStart; - pIoCtx->Req.Io.pImageCur = pImageStart; - pIoCtx->cDataTransfersPending = 0; - pIoCtx->cMetaTransfersPending = 0; - pIoCtx->fComplete = false; - pIoCtx->fBlocked = false; - pIoCtx->pvAllocation = pvAllocation; - pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer; - pIoCtx->pfnIoCtxTransferNext = NULL; - pIoCtx->rcReq = VINF_SUCCESS; - - /* There is no S/G list for a flush request. */ - if (enmTxDir != VDIOCTXTXDIR_FLUSH) - RTSgBufClone(&pIoCtx->Req.Io.SgBuf, pcSgBuf); - else - memset(&pIoCtx->Req.Io.SgBuf, 0, sizeof(RTSGBUF)); + vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, + pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); } return pIoCtx; @@ -1409,10 +1142,11 @@ DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, PFNVDASYNCTRANSFERCOMPLETE pfnComplete, void *pvUser1, void *pvUser2, void *pvAllocation, - PFNVDIOCTXTRANSFER pfnIoCtxTransfer) + PFNVDIOCTXTRANSFER pfnIoCtxTransfer, + uint32_t fFlags) { PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, - pcSgBuf, pvAllocation, pfnIoCtxTransfer); + pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags); if (RT_LIKELY(pIoCtx)) { @@ -1431,7 +1165,8 @@ DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges, PFNVDASYNCTRANSFERCOMPLETE pfnComplete, void *pvUser1, void *pvUser2, void *pvAllocation, - PFNVDIOCTXTRANSFER pfnIoCtxTransfer) + PFNVDIOCTXTRANSFER pfnIoCtxTransfer, + uint32_t fFlags) { PVDIOCTX pIoCtx = NULL; @@ -1444,7 +1179,7 @@ DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVBOXHDD pDisk, PCRTRANGE paRanges, pIoCtx->cDataTransfersPending = 0; pIoCtx->cMetaTransfersPending = 0; pIoCtx->fComplete = false; - pIoCtx->fBlocked = false; + pIoCtx->fFlags = fFlags; pIoCtx->pvAllocation = pvAllocation; pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer; pIoCtx->pfnIoCtxTransferNext = NULL; @@ -1474,7 +1209,7 @@ DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVBOXHDD pDisk, VDIOCTXTXDIR enmTxDir, PFNVDIOCTXTRANSFER pfnIoCtxTransfer) { PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart, - pcSgBuf, pvAllocation, pfnIoCtxTransfer); + pcSgBuf, pvAllocation, pfnIoCtxTransfer, pIoCtxParent->fFlags & ~VDIOCTX_FLAGS_DONT_FREE); AssertPtr(pIoCtxParent); Assert(!pIoCtxParent->pIoCtxParent); @@ -1529,17 +1264,24 @@ DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLE DECLINLINE(void) vdIoCtxFree(PVBOXHDD pDisk, PVDIOCTX pIoCtx) { - LogFlow(("Freeing I/O context %#p\n", pIoCtx)); - if (pIoCtx->pvAllocation) - RTMemFree(pIoCtx->pvAllocation); + Log(("Freeing I/O context %#p\n", pIoCtx)); + + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE)) + { + if (pIoCtx->pvAllocation) + RTMemFree(pIoCtx->pvAllocation); #ifdef DEBUG - memset(pIoCtx, 0xff, sizeof(VDIOCTX)); + memset(&pIoCtx->pDisk, 0xff, sizeof(void *)); #endif - RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx); + RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx); + } } DECLINLINE(void) vdIoTaskFree(PVBOXHDD pDisk, PVDIOTASK pIoTask) { +//#ifdef DEBUG + memset(pIoTask, 0xff, sizeof(VDIOTASK)); +//#endif RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask); } @@ -1549,7 +1291,8 @@ DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx) RTSgBufReset(&pIoCtx->Req.Io.SgBuf); pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved; - pIoCtx->Req.Io.cbTransferLeft = pIoCtx->Type.Child.cbTransferLeftSaved; + pIoCtx->Req.Io.cbTransferLeft = (uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved; + Assert((uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved == pIoCtx->Type.Child.cbTransferLeftSaved); } DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb) @@ -1569,22 +1312,28 @@ DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffse return pMetaXfer; } -DECLINLINE(int) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx) +DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx) { - PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED)); - - if (!pDeferred) - return VERR_NO_MEMORY; + /* Put it on the waiting list. */ + PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX); + PVDIOCTX pHeadOld; + pIoCtx->pIoCtxNext = pNext; + while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld)) + { + pNext = pHeadOld; + Assert(pNext != pIoCtx); + pIoCtx->pIoCtxNext = pNext; + ASMNopPause(); + } +} +DECLINLINE(void) vdIoCtxDefer(PVBOXHDD pDisk, PVDIOCTX pIoCtx) +{ LogFlowFunc(("Deferring write pIoCtx=%#p\n", pIoCtx)); - Assert(!pIoCtx->pIoCtxParent && !pIoCtx->fBlocked); - - RTListInit(&pDeferred->NodeDeferred); - pDeferred->pIoCtx = pIoCtx; - RTListAppend(&pDisk->ListWriteLocked, &pDeferred->NodeDeferred); - pIoCtx->fBlocked = true; - return VINF_SUCCESS; + Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)); + pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; + vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx); } static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData) @@ -1597,15 +1346,14 @@ static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData) return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData); } -static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData) +static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData) { - return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); + return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); } - static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData) { - return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); + return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData); } static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData) @@ -1614,8 +1362,8 @@ static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData) } /** - * Process the I/O context, core method which assumes that the critsect is acquired - * by the calling thread. + * Process the I/O context, core method which assumes that the I/O context + * acquired the lock. * * @returns VBox status code. * @param pIoCtx I/O context to process. @@ -1624,7 +1372,7 @@ static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx) { int rc = VINF_SUCCESS; - VD_THREAD_IS_CRITSECT_OWNER(pIoCtx->pDisk); + VD_IS_LOCKED(pIoCtx->pDisk); LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); @@ -1650,7 +1398,7 @@ static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx) /* Don't change anything if there is a metadata transfer pending or we are blocked. */ if ( pIoCtx->cMetaTransfersPending - || pIoCtx->fBlocked) + || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)) { rc = VERR_VD_ASYNC_IO_IN_PROGRESS; goto out; @@ -1683,18 +1431,23 @@ static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx) || rc == VERR_VD_NOT_ENOUGH_METADATA || rc == VERR_VD_IOCTX_HALT) rc = VERR_VD_ASYNC_IO_IN_PROGRESS; - else if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)) + else if ( RT_FAILURE(rc) + && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)) { ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS); - /* - * The I/O context completed if we have an error and there is no data - * or meta data transfer pending. - */ - if ( !pIoCtx->cMetaTransfersPending - && !pIoCtx->cDataTransfersPending) - rc = VINF_VD_ASYNC_IO_FINISHED; - else - rc = VERR_VD_ASYNC_IO_IN_PROGRESS; + + if (rc != VERR_DISK_FULL) + { + /* + * The I/O context completed if we have an error and there is no data + * or meta data transfer pending. + */ + if ( !pIoCtx->cMetaTransfersPending + && !pIoCtx->cDataTransfersPending) + rc = VINF_VD_ASYNC_IO_FINISHED; + else + rc = VERR_VD_ASYNC_IO_IN_PROGRESS; + } } out: @@ -1720,7 +1473,7 @@ static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc)); - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + VD_IS_LOCKED(pDisk); /* Get the waiting list and process it in FIFO order. */ PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX); @@ -1746,6 +1499,19 @@ static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) pCur = pCur->pIoCtxNext; pTmp->pIoCtxNext = NULL; + /* + * Need to clear the sync flag here if there is a new I/O context + * with it set and the context is not given in pIoCtxRc. + * This happens most likely on a different thread and that one shouldn't + * process the context synchronously. + * + * The thread who issued the context will wait on the event semaphore + * anyway which is signalled when the completion handler is called. + */ + if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC + && pTmp != pIoCtxRc) + pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC; + rcTmp = vdIoCtxProcessLocked(pTmp); if (pTmp == pIoCtxRc) { @@ -1764,64 +1530,73 @@ static int vdDiskProcessWaitingIoCtx(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) } } + /* + * vdIoCtxProcessLocked() never returns VINF_SUCCESS. + * If the status code is still set and a valid I/O context was given + * it was not found on the list (another thread cleared it already). + * Return I/O in progress status code in that case. + */ + if (rc == VINF_SUCCESS && pIoCtxRc) + rc = VERR_VD_ASYNC_IO_IN_PROGRESS; + LogFlowFunc(("returns rc=%Rrc\n", rc)); return rc; } /** - * Leaves the critical section of the disk processing waiting I/O contexts. + * Processes the list of blocked I/O contexts. * - * @returns VBox status code. - * @param pDisk The disk to unlock. - * @param pIoCtxRc An I/O context handle which waits on the list. When processed - * The status code is returned. NULL if there is no I/O context - * to return the status code for. + * @returns nothing. + * @param pDisk The disk structure. */ -static int vdDiskCritSectLeave(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) +static void vdDiskProcessBlockedIoCtx(PVBOXHDD pDisk) { - int rc = VINF_SUCCESS; + LogFlowFunc(("pDisk=%#p\n", pDisk)); - LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc)); + VD_IS_LOCKED(pDisk); - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + /* Get the waiting list and process it in FIFO order. */ + PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX); - rc = vdDiskProcessWaitingIoCtx(pDisk, pIoCtxRc); - RTCritSectLeave(&pDisk->CritSect); + /* Reverse it. */ + PVDIOCTX pCur = pIoCtxHead; + pIoCtxHead = NULL; + while (pCur) + { + PVDIOCTX pInsert = pCur; + pCur = pCur->pIoCtxNext; + pInsert->pIoCtxNext = pIoCtxHead; + pIoCtxHead = pInsert; + } - /* - * We have to check for new waiting contexts here. It is possible that - * another thread has queued another one while process waiting contexts - * and because we still held the lock it was appended to the waiting list. - * - * @note Don't overwrite rc here because this might result in loosing - * the status code of the given I/O context. - */ - while (ASMAtomicReadPtrT(&pDisk->pIoCtxHead, PVDIOCTX) != NULL) + /* Process now. */ + pCur = pIoCtxHead; + while (pCur) { - int rc2 = RTCritSectTryEnter(&pDisk->CritSect); + int rc; + PVDIOCTX pTmp = pCur; - if (RT_SUCCESS(rc2)) - { - /* - * Don't pass status codes for any I/O context here. The context must hae been - * in the first run. - */ - vdDiskProcessWaitingIoCtx(pDisk, NULL); - RTCritSectLeave(&pDisk->CritSect); - } - else + pCur = pCur->pIoCtxNext; + pTmp->pIoCtxNext = NULL; + + Assert(!pTmp->pIoCtxParent); + Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED); + pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED; + + rc = vdIoCtxProcessLocked(pTmp); + if ( rc == VINF_VD_ASYNC_IO_FINISHED + && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false)) { - /* - * Another thread is holding the lock already and will process the list - * whewn leaving the lock, nothing left to do for us. - */ - Assert(rc2 == VERR_SEM_BUSY); - break; + LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp)); + vdThreadFinishWrite(pDisk); + pTmp->Type.Root.pfnComplete(pTmp->Type.Root.pvUser1, + pTmp->Type.Root.pvUser2, + pTmp->rcReq); + vdIoCtxFree(pDisk, pTmp); } } - LogFlowFunc(("returns rc=%Rrc\n", rc)); - return rc; + LogFlowFunc(("returns\n")); } /** @@ -1836,31 +1611,20 @@ static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx) int rc = VINF_SUCCESS; PVBOXHDD pDisk = pIoCtx->pDisk; - LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); + Log(("Defer pIoCtx=%#p\n", pIoCtx)); /* Put it on the waiting list first. */ - PVDIOCTX pNext = ASMAtomicUoReadPtrT(&pDisk->pIoCtxHead, PVDIOCTX); - PVDIOCTX pHeadOld; - pIoCtx->pIoCtxNext = pNext; - while (!ASMAtomicCmpXchgExPtr(&pDisk->pIoCtxHead, pIoCtx, pNext, &pHeadOld)) - { - pNext = pHeadOld; - Assert(pNext != pIoCtx); - pIoCtx->pIoCtxNext = pNext; - ASMNopPause(); - } + vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx); - rc = RTCritSectTryEnter(&pDisk->CritSect); - if (RT_SUCCESS(rc)) + if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) { /* Leave it again, the context will be processed just before leaving the lock. */ - LogFlowFunc(("Successfully acquired the critical section\n")); - rc = vdDiskCritSectLeave(pDisk, pIoCtx); + LogFlowFunc(("Successfully acquired the lock\n")); + rc = vdDiskUnlock(pDisk, pIoCtx); } else { - AssertMsg(rc == VERR_SEM_BUSY, ("Invalid return code %Rrc\n", rc)); - LogFlowFunc(("Critical section is busy\n")); + LogFlowFunc(("Lock is held\n")); rc = VERR_VD_ASYNC_IO_IN_PROGRESS; } @@ -1868,113 +1632,123 @@ static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx) } /** - * Wrapper for vdIoCtxProcessLocked() which acquires the lock before. + * Process the I/O context in a synchronous manner, waiting + * for it to complete. * - * @returns VBox status code. - * @param pIoCtx I/O context to process. + * @returns VBox status code of the completed request. + * @param pIoCtx The sync I/O context. */ -static int vdIoCtxProcess(PVDIOCTX pIoCtx) +static int vdIoCtxProcessSync(PVDIOCTX pIoCtx) { int rc = VINF_SUCCESS; PVBOXHDD pDisk = pIoCtx->pDisk; - LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); + LogFlowFunc(("pIoCtx=%p\n", pIoCtx)); + + AssertMsg(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC, + ("I/O context is not marked as synchronous\n")); + + rc = vdIoCtxProcessTryLockDefer(pIoCtx); + if (rc == VINF_VD_ASYNC_IO_FINISHED) + rc = VINF_SUCCESS; - RTCritSectEnter(&pDisk->CritSect); - rc = vdIoCtxProcessLocked(pIoCtx); - vdDiskCritSectLeave(pDisk, NULL); + if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) + { + rc = RTSemEventWait(pDisk->hEventSemSyncIo, RT_INDEFINITE_WAIT); + AssertRC(rc); + + rc = pDisk->rcSync; + } + else /* Success or error. */ + { + rc = pIoCtx->rcReq; + vdIoCtxFree(pDisk, pIoCtx); + } return rc; } DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVBOXHDD pDisk, PVDIOCTX pIoCtx) { - return pDisk->fLocked - && pDisk->pIoCtxLockOwner == pIoCtx; + return pDisk->pIoCtxLockOwner == pIoCtx; } static int vdIoCtxLockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx) { int rc = VINF_SUCCESS; + VD_IS_LOCKED(pDisk); + LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx)); - if (!ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) + if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX)) { Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */ - - rc = vdIoCtxDefer(pDisk, pIoCtx); - if (RT_SUCCESS(rc)) - rc = VERR_VD_ASYNC_IO_IN_PROGRESS; - } - else - { - Assert(!pDisk->pIoCtxLockOwner); - pDisk->pIoCtxLockOwner = pIoCtx; + vdIoCtxDefer(pDisk, pIoCtx); + rc = VERR_VD_ASYNC_IO_IN_PROGRESS; } LogFlowFunc(("returns -> %Rrc\n", rc)); return rc; } -static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessDeferredReqs) +static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs) { - LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessDeferredReqs=%RTbool\n", - pDisk, pIoCtx, fProcessDeferredReqs)); + LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n", + pDisk, pIoCtx, fProcessBlockedReqs)); + + VD_IS_LOCKED(pDisk); LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner)); - Assert(pDisk->fLocked); Assert(pDisk->pIoCtxLockOwner == pIoCtx); - pDisk->pIoCtxLockOwner = NULL; - ASMAtomicXchgBool(&pDisk->fLocked, false); + ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX); - if (fProcessDeferredReqs) + if (fProcessBlockedReqs) { - /* Process any pending writes if the current request didn't caused another growing. */ - RTCritSectEnter(&pDisk->CritSect); - - if (!RTListIsEmpty(&pDisk->ListWriteLocked)) - { - RTLISTNODE ListTmp; - - RTListMove(&ListTmp, &pDisk->ListWriteLocked); - vdDiskCritSectLeave(pDisk, NULL); + /* Process any blocked writes if the current request didn't caused another growing. */ + vdDiskProcessBlockedIoCtx(pDisk); + } - /* Process the list. */ - do - { - int rc; - PVDIOCTXDEFERRED pDeferred = RTListGetFirst(&ListTmp, VDIOCTXDEFERRED, NodeDeferred); - PVDIOCTX pIoCtxWait = pDeferred->pIoCtx; + LogFlowFunc(("returns\n")); +} - AssertPtr(pIoCtxWait); +/** + * Internal: Reads a given amount of data from the image chain of the disk. + **/ +static int vdDiskReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, + uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead) +{ + int rc = VINF_SUCCESS; + size_t cbThisRead = cbRead; - RTListNodeRemove(&pDeferred->NodeDeferred); - RTMemFree(pDeferred); + AssertPtr(pcbThisRead); - Assert(!pIoCtxWait->pIoCtxParent); + *pcbThisRead = 0; - pIoCtxWait->fBlocked = false; - LogFlowFunc(("Processing waiting I/O context pIoCtxWait=%#p\n", pIoCtxWait)); + /* + * Try to read from the given image. + * If the block is not allocated read from override chain if present. + */ + rc = pImage->Backend->pfnRead(pImage->pBackendData, + uOffset, cbThisRead, pIoCtx, + &cbThisRead); - rc = vdIoCtxProcess(pIoCtxWait); - if ( rc == VINF_VD_ASYNC_IO_FINISHED - && ASMAtomicCmpXchgBool(&pIoCtxWait->fComplete, true, false)) - { - LogFlowFunc(("Waiting I/O context completed pIoCtxWait=%#p\n", pIoCtxWait)); - vdThreadFinishWrite(pDisk); - pIoCtxWait->Type.Root.pfnComplete(pIoCtxWait->Type.Root.pvUser1, - pIoCtxWait->Type.Root.pvUser2, - pIoCtxWait->rcReq); - vdIoCtxFree(pDisk, pIoCtxWait); - } - } while (!RTListIsEmpty(&ListTmp)); + if (rc == VERR_VD_BLOCK_FREE) + { + for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev; + pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE; + pCurrImage = pCurrImage->pPrev) + { + rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, + uOffset, cbThisRead, pIoCtx, + &cbThisRead); } - else - vdDiskCritSectLeave(pDisk, NULL); } - LogFlowFunc(("returns\n")); + if (RT_SUCCESS(rc) || rc == VERR_VD_BLOCK_FREE) + *pcbThisRead = cbThisRead; + + return rc; } /** @@ -1984,9 +1758,12 @@ static void vdIoCtxUnlockDisk(PVBOXHDD pDisk, PVDIOCTX pIoCtx, bool fProcessDefe static int vdReadHelperAsync(PVDIOCTX pIoCtx) { int rc; - size_t cbToRead = pIoCtx->Req.Io.cbTransfer; - uint64_t uOffset = pIoCtx->Req.Io.uOffset; - PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;; + PVBOXHDD pDisk = pIoCtx->pDisk; + size_t cbToRead = pIoCtx->Req.Io.cbTransfer; + uint64_t uOffset = pIoCtx->Req.Io.uOffset; + PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur; + PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride; + unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead; size_t cbThisRead; /* Loop until all reads started or we have a backend which needs to read metadata. */ @@ -1997,23 +1774,57 @@ static int vdReadHelperAsync(PVDIOCTX pIoCtx) * stale data when different block sizes are used for the images. */ cbThisRead = cbToRead; - /* - * Try to read from the given image. - * If the block is not allocated read from override chain if present. - */ - rc = pCurrImage->Backend->pfnAsyncRead(pCurrImage->pBackendData, - uOffset, cbThisRead, - pIoCtx, &cbThisRead); + if ( pDisk->pCache + && !pImageParentOverride) + { + rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead, + pIoCtx, &cbThisRead); + if (rc == VERR_VD_BLOCK_FREE) + { + rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead, + pIoCtx, &cbThisRead); - if (rc == VERR_VD_BLOCK_FREE) + /* If the read was successful, write the data back into the cache. */ + if ( RT_SUCCESS(rc) + && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UPDATE_CACHE) + { + rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead, + pIoCtx, NULL); + } + } + } + else { - while ( pCurrImage->pPrev != NULL - && rc == VERR_VD_BLOCK_FREE) + + /* + * Try to read from the given image. + * If the block is not allocated read from override chain if present. + */ + rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, + uOffset, cbThisRead, pIoCtx, + &cbThisRead); + + if ( rc == VERR_VD_BLOCK_FREE + && cImagesRead != 1) { - pCurrImage = pCurrImage->pPrev; - rc = pCurrImage->Backend->pfnAsyncRead(pCurrImage->pBackendData, - uOffset, cbThisRead, - pIoCtx, &cbThisRead); + unsigned cImagesToProcess = cImagesRead; + + pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev; + pIoCtx->Req.Io.pImageParentOverride = NULL; + + while (pCurrImage && rc == VERR_VD_BLOCK_FREE) + { + rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, + uOffset, cbThisRead, + pIoCtx, &cbThisRead); + if (cImagesToProcess == 1) + break; + else if (cImagesToProcess > 0) + cImagesToProcess--; + + if (rc == VERR_VD_BLOCK_FREE) + pCurrImage = pCurrImage->pPrev; + } } } @@ -2021,17 +1832,41 @@ static int vdReadHelperAsync(PVDIOCTX pIoCtx) if (rc == VERR_VD_BLOCK_FREE) { /* No image in the chain contains the data for the block. */ - vdIoCtxSet(pIoCtx, '\0', cbThisRead); - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisRead); - rc = VINF_SUCCESS; + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisRead); Assert(cbThisRead == (uint32_t)cbThisRead); + + /* Fill the free space with 0 if we are told to do so + * or a previous read returned valid data. */ + if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS) + vdIoCtxSet(pIoCtx, '\0', cbThisRead); + else + pIoCtx->Req.Io.cbBufClear += cbThisRead; + + if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS) + rc = VINF_VD_NEW_ZEROED_BLOCK; + else + rc = VINF_SUCCESS; } - else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) - rc = VINF_SUCCESS; else if (rc == VERR_VD_IOCTX_HALT) { uOffset += cbThisRead; cbToRead -= cbThisRead; - pIoCtx->fBlocked = true; + pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; + } + else if ( RT_SUCCESS(rc) + || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) + { + /* First not free block, fill the space before with 0. */ + if ( pIoCtx->Req.Io.cbBufClear + && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)) + { + RTSGBUF SgBuf; + RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf); + RTSgBufReset(&SgBuf); + RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear); + pIoCtx->Req.Io.cbBufClear = 0; + pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; + } + rc = VINF_SUCCESS; } if (RT_FAILURE(rc)) @@ -2051,7 +1886,9 @@ static int vdReadHelperAsync(PVDIOCTX pIoCtx) pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart; } - return rc; + return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)) + ? VERR_VD_BLOCK_FREE + : rc; } /** @@ -2061,8 +1898,93 @@ static int vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf, size_t cbRead) { PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser; - return vdReadHelper(pParentState->pDisk, pParentState->pImage, uOffset, - pvBuf, cbRead, false /* fUpdateCache */); + + /** @todo + * Only used for compaction so far which is not possible to mix with async I/O. + * Needs to be changed if we want to support online compaction of images. + */ + bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true); + AssertMsgReturn(!fLocked, + ("Calling synchronous parent read while another thread holds the disk lock\n"), + VERR_VD_INVALID_STATE); + + /* Fake an I/O context. */ + RTSGSEG Segment; + RTSGBUF SgBuf; + VDIOCTX IoCtx; + + Segment.pvSeg = pvBuf; + Segment.cbSeg = cbRead; + RTSgBufInit(&SgBuf, &Segment, 1); + vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage, + &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_ZERO_FREE_BLOCKS); + int rc = vdReadHelperAsync(&IoCtx); + ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false); + return rc; +} + +/** + * Extended version of vdReadHelper(), implementing certain optimizations + * for image cloning. + * + * @returns VBox status code. + * @param pDisk The disk to read from. + * @param pImage The image to start reading from. + * @param pImageParentOverride The parent image to read from + * if the starting image returns a free block. + * If NULL is passed the real parent of the image + * in the chain is used. + * @param uOffset Offset in the disk to start reading from. + * @param pvBuf Where to store the read data. + * @param cbRead How much to read. + * @param fZeroFreeBlocks Flag whether free blocks should be zeroed. + * If false and no image has data for sepcified + * range VERR_VD_BLOCK_FREE is returned. + * Note that unallocated blocks are still zeroed + * if at least one image has valid data for a part + * of the range. + * @param fUpdateCache Flag whether to update the attached cache if + * available. + * @param cImagesRead Number of images in the chain to read until + * the read is cut off. A value of 0 disables the cut off. + */ +static int vdReadHelperEx(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, + uint64_t uOffset, void *pvBuf, size_t cbRead, + bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead) +{ + uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; + RTSGSEG Segment; + RTSGBUF SgBuf; + VDIOCTX IoCtx; + + if (fZeroFreeBlocks) + fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; + if (fUpdateCache) + fFlags |= VDIOCTX_FLAGS_READ_UPDATE_CACHE; + + Segment.pvSeg = pvBuf; + Segment.cbSeg = cbRead; + RTSgBufInit(&SgBuf, &Segment, 1); + vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf, + NULL, vdReadHelperAsync, fFlags); + + IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; + IoCtx.Req.Io.cImagesRead = cImagesRead; + IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; + IoCtx.Type.Root.pvUser1 = pDisk; + IoCtx.Type.Root.pvUser2 = NULL; + return vdIoCtxProcessSync(&IoCtx); +} + +/** + * internal: read the specified amount of data in whatever blocks the backend + * will give us. + */ +static int vdReadHelper(PVBOXHDD pDisk, PVDIMAGE pImage, uint64_t uOffset, + void *pvBuf, size_t cbRead, bool fUpdateCache) +{ + return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead, + true /* fZeroFreeBlocks */, fUpdateCache, 0); } /** @@ -2104,175 +2026,13 @@ static void vdSetModifiedFlag(PVBOXHDD pDisk) vdResetModifiedFlag(pDisk); if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE)) - pDisk->pLast->Backend->pfnFlush(pDisk->pLast->pBackendData); - } -} - -/** - * internal: write a complete block (only used for diff images), taking the - * remaining data from parent images. This implementation does not optimize - * anything (except that it tries to read only that portions from parent - * images that are really needed). - */ -static int vdWriteHelperStandard(PVBOXHDD pDisk, PVDIMAGE pImage, - PVDIMAGE pImageParentOverride, - uint64_t uOffset, size_t cbWrite, - size_t cbThisWrite, size_t cbPreRead, - size_t cbPostRead, const void *pvBuf, - void *pvTmp) -{ - int rc = VINF_SUCCESS; - - /* Read the data that goes before the write to fill the block. */ - if (cbPreRead) - { - /* - * Updating the cache doesn't make sense here because - * this will be done after the complete block was written. - */ - rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, - uOffset - cbPreRead, pvTmp, cbPreRead, - true /* fZeroFreeBlocks*/, - false /* fUpdateCache */, 0); - if (RT_FAILURE(rc)) - return rc; - } - - /* Copy the data to the right place in the buffer. */ - memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite); - - /* Read the data that goes after the write to fill the block. */ - if (cbPostRead) - { - /* If we have data to be written, use that instead of reading - * data from the image. */ - size_t cbWriteCopy; - if (cbWrite > cbThisWrite) - cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); - else - cbWriteCopy = 0; - /* Figure out how much we cannot read from the image, because - * the last block to write might exceed the nominal size of the - * image for technical reasons. */ - size_t cbFill; - if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) - cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; - else - cbFill = 0; - /* The rest must be read from the image. */ - size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; - - /* Now assemble the remaining data. */ - if (cbWriteCopy) - memcpy((char *)pvTmp + cbPreRead + cbThisWrite, - (char *)pvBuf + cbThisWrite, cbWriteCopy); - if (cbReadImage) - rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, - uOffset + cbThisWrite + cbWriteCopy, - (char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy, - cbReadImage, true /* fZeroFreeBlocks */, - false /* fUpdateCache */, 0); - if (RT_FAILURE(rc)) - return rc; - /* Zero out the remainder of this block. Will never be visible, as this - * is beyond the limit of the image. */ - if (cbFill) - memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage, - '\0', cbFill); - } - - /* Write the full block to the virtual disk. */ - rc = pImage->Backend->pfnWrite(pImage->pBackendData, - uOffset - cbPreRead, pvTmp, - cbPreRead + cbThisWrite + cbPostRead, - NULL, &cbPreRead, &cbPostRead, 0); - Assert(rc != VERR_VD_BLOCK_FREE); - Assert(cbPreRead == 0); - Assert(cbPostRead == 0); - - return rc; -} - -/** - * internal: write a complete block (only used for diff images), taking the - * remaining data from parent images. This implementation optimizes out writes - * that do not change the data relative to the state as of the parent images. - * All backends which support differential/growing images support this. - */ -static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage, - PVDIMAGE pImageParentOverride, - uint64_t uOffset, size_t cbWrite, - size_t cbThisWrite, size_t cbPreRead, - size_t cbPostRead, const void *pvBuf, - void *pvTmp, unsigned cImagesRead) -{ - size_t cbFill = 0; - size_t cbWriteCopy = 0; - size_t cbReadImage = 0; - int rc; - - if (cbPostRead) - { - /* Figure out how much we cannot read from the image, because - * the last block to write might exceed the nominal size of the - * image for technical reasons. */ - if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) - cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; - - /* If we have data to be written, use that instead of reading - * data from the image. */ - if (cbWrite > cbThisWrite) - cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); - - /* The rest must be read from the image. */ - cbReadImage = cbPostRead - cbWriteCopy - cbFill; - } - - /* Read the entire data of the block so that we can compare whether it will - * be modified by the write or not. */ - rc = vdReadHelperEx(pDisk, pImage, pImageParentOverride, uOffset - cbPreRead, pvTmp, - cbPreRead + cbThisWrite + cbPostRead - cbFill, - true /* fZeroFreeBlocks */, false /* fUpdateCache */, - cImagesRead); - if (RT_FAILURE(rc)) - return rc; - - /* Check if the write would modify anything in this block. */ - if ( !memcmp((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite) - && (!cbWriteCopy || !memcmp((char *)pvTmp + cbPreRead + cbThisWrite, - (char *)pvBuf + cbThisWrite, cbWriteCopy))) - { - /* Block is completely unchanged, so no need to write anything. */ - return VINF_SUCCESS; - } - - /* Copy the data to the right place in the buffer. */ - memcpy((char *)pvTmp + cbPreRead, pvBuf, cbThisWrite); - - /* Handle the data that goes after the write to fill the block. */ - if (cbPostRead) - { - /* Now assemble the remaining data. */ - if (cbWriteCopy) - memcpy((char *)pvTmp + cbPreRead + cbThisWrite, - (char *)pvBuf + cbThisWrite, cbWriteCopy); - /* Zero out the remainder of this block. Will never be visible, as this - * is beyond the limit of the image. */ - if (cbFill) - memset((char *)pvTmp + cbPreRead + cbThisWrite + cbWriteCopy + cbReadImage, - '\0', cbFill); + { + VDIOCTX IoCtx; + vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_FLUSH, 0, 0, NULL, + NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC); + pDisk->pLast->Backend->pfnFlush(pDisk->pLast->pBackendData, &IoCtx); + } } - - /* Write the full block to the virtual disk. */ - rc = pImage->Backend->pfnWrite(pImage->pBackendData, - uOffset - cbPreRead, pvTmp, - cbPreRead + cbThisWrite + cbPostRead, - NULL, &cbPreRead, &cbPostRead, 0); - Assert(rc != VERR_VD_BLOCK_FREE); - Assert(cbPreRead == 0); - Assert(cbPostRead == 0); - - return rc; } /** @@ -2282,76 +2042,27 @@ static int vdWriteHelperOptimized(PVBOXHDD pDisk, PVDIMAGE pImage, static int vdWriteHelperEx(PVBOXHDD pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride, uint64_t uOffset, const void *pvBuf, size_t cbWrite, - bool fUpdateCache, unsigned cImagesRead) + uint32_t fFlags, unsigned cImagesRead) { - int rc; - unsigned fWrite; - size_t cbThisWrite; - size_t cbPreRead, cbPostRead; - uint64_t uOffsetCur = uOffset; - size_t cbWriteCur = cbWrite; - const void *pcvBufCur = pvBuf; + RTSGSEG Segment; + RTSGBUF SgBuf; + VDIOCTX IoCtx; - /* Loop until all written. */ - do - { - /* Try to write the possibly partial block to the last opened image. - * This works when the block is already allocated in this image or - * if it is a full-block write (and allocation isn't suppressed below). - * For image formats which don't support zero blocks, it's beneficial - * to avoid unnecessarily allocating unchanged blocks. This prevents - * unwanted expanding of images. VMDK is an example. */ - cbThisWrite = cbWriteCur; - fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME) - ? 0 : VD_WRITE_NO_ALLOC; - rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffsetCur, pcvBufCur, - cbThisWrite, &cbThisWrite, &cbPreRead, - &cbPostRead, fWrite); - if (rc == VERR_VD_BLOCK_FREE) - { - void *pvTmp = RTMemTmpAlloc(cbPreRead + cbThisWrite + cbPostRead); - AssertBreakStmt(VALID_PTR(pvTmp), rc = VERR_NO_MEMORY); + fFlags |= VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE; - if (!(pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)) - { - /* Optimized write, suppress writing to a so far unallocated - * block if the data is in fact not changed. */ - rc = vdWriteHelperOptimized(pDisk, pImage, pImageParentOverride, - uOffsetCur, cbWriteCur, - cbThisWrite, cbPreRead, cbPostRead, - pcvBufCur, pvTmp, cImagesRead); - } - else - { - /* Normal write, not optimized in any way. The block will - * be written no matter what. This will usually (unless the - * backend has some further optimization enabled) cause the - * block to be allocated. */ - rc = vdWriteHelperStandard(pDisk, pImage, pImageParentOverride, - uOffsetCur, cbWriteCur, - cbThisWrite, cbPreRead, cbPostRead, - pcvBufCur, pvTmp); - } - RTMemTmpFree(pvTmp); - if (RT_FAILURE(rc)) - break; - } - - cbWriteCur -= cbThisWrite; - uOffsetCur += cbThisWrite; - pcvBufCur = (char *)pcvBufCur + cbThisWrite; - } while (cbWriteCur != 0 && RT_SUCCESS(rc)); + Segment.pvSeg = (void *)pvBuf; + Segment.cbSeg = cbWrite; + RTSgBufInit(&SgBuf, &Segment, 1); + vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf, + NULL, vdWriteHelperAsync, fFlags); - /* Update the cache on success */ - if ( RT_SUCCESS(rc) - && pDisk->pCache - && fUpdateCache) - rc = vdCacheWriteHelper(pDisk->pCache, uOffset, pvBuf, cbWrite, NULL); - - if (RT_SUCCESS(rc)) - rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite); - - return rc; + IoCtx.Req.Io.pImageParentOverride = pImageParentOverride; + IoCtx.Req.Io.cImagesRead = cImagesRead; + IoCtx.pIoCtxParent = NULL; + IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete; + IoCtx.Type.Root.pvUser1 = pDisk; + IoCtx.Type.Root.pvUser2 = NULL; + return vdIoCtxProcessSync(&IoCtx); } /** @@ -2359,10 +2070,10 @@ static int vdWriteHelperEx(PVBOXHDD pDisk, PVDIMAGE pImage, * write optimizations. */ static int vdWriteHelper(PVBOXHDD pDisk, PVDIMAGE pImage, uint64_t uOffset, - const void *pvBuf, size_t cbWrite, bool fUpdateCache) + const void *pvBuf, size_t cbWrite, uint32_t fFlags) { return vdWriteHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbWrite, - fUpdateCache, 0); + fFlags, 0); } /** @@ -2407,9 +2118,19 @@ static int vdCopyHelper(PVBOXHDD pDiskFrom, PVDIMAGE pImageFrom, PVBOXHDD pDiskT if (fBlockwiseCopy) { + RTSGSEG SegmentBuf; + RTSGBUF SgBuf; + VDIOCTX IoCtx; + + SegmentBuf.pvSeg = pvBuf; + SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE; + RTSgBufInit(&SgBuf, &SegmentBuf, 1); + vdIoCtxInit(&IoCtx, pDiskFrom, VDIOCTXTXDIR_READ, 0, 0, NULL, + &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); + /* Read the source data. */ rc = pImageFrom->Backend->pfnRead(pImageFrom->pBackendData, - uOffset, pvBuf, cbThisRead, + uOffset, cbThisRead, &IoCtx, &cbThisRead); if ( rc == VERR_VD_BLOCK_FREE @@ -2422,8 +2143,8 @@ static int vdCopyHelper(PVBOXHDD pDiskFrom, PVDIMAGE pImageFrom, PVBOXHDD pDiskT pCurrImage = pCurrImage->pPrev) { rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, - uOffset, pvBuf, cbThisRead, - &cbThisRead); + uOffset, cbThisRead, + &IoCtx, &cbThisRead); if (cImagesToProcess == 1) break; else if (cImagesToProcess > 0) @@ -2450,7 +2171,7 @@ static int vdCopyHelper(PVBOXHDD pDiskFrom, PVDIMAGE pImageFrom, PVBOXHDD pDiskT /* Only do collapsed I/O if we are copying the data blockwise. */ rc = vdWriteHelperEx(pDiskTo, pDiskTo->pLast, NULL, uOffset, pvBuf, - cbThisRead, false /* fUpdateCache */, + cbThisRead, VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG /* fFlags */, fBlockwiseCopy ? cImagesToRead : 0); if (RT_FAILURE(rc)) break; @@ -2514,7 +2235,7 @@ static int vdSetModifiedHelperAsync(PVDIOCTX pIoCtx) PVBOXHDD pDisk = pIoCtx->pDisk; PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur; - rc = pImage->Backend->pfnAsyncFlush(pImage->pBackendData, pIoCtx); + rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx); if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) rc = VINF_SUCCESS; @@ -2528,6 +2249,8 @@ static int vdSetModifiedFlagAsync(PVBOXHDD pDisk, PVDIOCTX pIoCtx) { int rc = VINF_SUCCESS; + VD_IS_LOCKED(pDisk); + pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG; if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST) { @@ -2548,7 +2271,7 @@ static int vdSetModifiedFlagAsync(PVBOXHDD pDisk, PVDIOCTX pIoCtx) if (pIoCtxFlush) { - rc = vdIoCtxProcess(pIoCtxFlush); + rc = vdIoCtxProcessLocked(pIoCtxFlush); if (rc == VINF_VD_ASYNC_IO_FINISHED) { vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs */); @@ -2557,7 +2280,7 @@ static int vdSetModifiedFlagAsync(PVBOXHDD pDisk, PVDIOCTX pIoCtx) else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) { ASMAtomicIncU32(&pIoCtx->cDataTransfersPending); - pIoCtx->fBlocked = true; + pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; } else /* Another error */ vdIoCtxFree(pDisk, pIoCtxFlush); @@ -2571,101 +2294,7 @@ static int vdSetModifiedFlagAsync(PVBOXHDD pDisk, PVDIOCTX pIoCtx) return rc; } -/** - * internal: write a complete block (only used for diff images), taking the - * remaining data from parent images. This implementation does not optimize - * anything (except that it tries to read only that portions from parent - * images that are really needed) - async version. - */ -static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) -{ - int rc = VINF_SUCCESS; - -#if 0 - - /* Read the data that goes before the write to fill the block. */ - if (cbPreRead) - { - rc = vdReadHelperAsync(pIoCtxDst); - if (RT_FAILURE(rc)) - return rc; - } - - /* Copy the data to the right place in the buffer. */ - vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbThisWrite); - - /* Read the data that goes after the write to fill the block. */ - if (cbPostRead) - { - /* If we have data to be written, use that instead of reading - * data from the image. */ - size_t cbWriteCopy; - if (cbWrite > cbThisWrite) - cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); - else - cbWriteCopy = 0; - /* Figure out how much we cannot read from the image, because - * the last block to write might exceed the nominal size of the - * image for technical reasons. */ - size_t cbFill; - if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) - cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; - else - cbFill = 0; - /* The rest must be read from the image. */ - size_t cbReadImage = cbPostRead - cbWriteCopy - cbFill; - - /* Now assemble the remaining data. */ - if (cbWriteCopy) - { - vdIoCtxCopy(pIoCtxDst, pIoCtxSrc, cbWriteCopy); - ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbWriteCopy); - } - - if (cbReadImage) - rc = vdReadHelperAsync(pDisk, pImage, pImageParentOverride, pIoCtxDst, - uOffset + cbThisWrite + cbWriteCopy, - cbReadImage); - if (RT_FAILURE(rc)) - return rc; - /* Zero out the remainder of this block. Will never be visible, as this - * is beyond the limit of the image. */ - if (cbFill) - { - vdIoCtxSet(pIoCtxDst, '\0', cbFill); - ASMAtomicSubU32(&pIoCtxDst->cbTransferLeft, cbFill); - } - } - - if ( !pIoCtxDst->cbTransferLeft - && !pIoCtxDst->cMetaTransfersPending - && ASMAtomicCmpXchgBool(&pIoCtxDst->fComplete, true, false)) - { - /* Write the full block to the virtual disk. */ - vdIoCtxChildReset(pIoCtxDst); - rc = pImage->Backend->pfnAsyncWrite(pImage->pBackendData, - uOffset - cbPreRead, - cbPreRead + cbThisWrite + cbPostRead, - pIoCtxDst, - NULL, &cbPreRead, &cbPostRead, 0); - Assert(rc != VERR_VD_BLOCK_FREE); - Assert(cbPreRead == 0); - Assert(cbPostRead == 0); - } - else - { - LogFlow(("cbTransferLeft=%u cMetaTransfersPending=%u fComplete=%RTbool\n", - pIoCtxDst->cbTransferLeft, pIoCtxDst->cMetaTransfersPending, - pIoCtxDst->fComplete)); - rc = VERR_VD_ASYNC_IO_IN_PROGRESS; - } - - return rc; -#endif - return VERR_NOT_IMPLEMENTED; -} - -static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx) +static int vdWriteHelperCommitAsync(PVDIOCTX pIoCtx) { int rc = VINF_SUCCESS; PVDIMAGE pImage = pIoCtx->Req.Io.pImageStart; @@ -2674,10 +2303,10 @@ static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx) size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); - rc = pImage->Backend->pfnAsyncWrite(pImage->pBackendData, - pIoCtx->Req.Io.uOffset - cbPreRead, - cbPreRead + cbThisWrite + cbPostRead, - pIoCtx, NULL, &cbPreRead, &cbPostRead, 0); + rc = pImage->Backend->pfnWrite(pImage->pBackendData, + pIoCtx->Req.Io.uOffset - cbPreRead, + cbPreRead + cbThisWrite + cbPostRead, + pIoCtx, NULL, &cbPreRead, &cbPostRead, 0); Assert(rc != VERR_VD_BLOCK_FREE); Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0); Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPostRead == 0); @@ -2685,7 +2314,7 @@ static int vdWriteHelperOptimizedCommitAsync(PVDIOCTX pIoCtx) rc = VINF_SUCCESS; else if (rc == VERR_VD_IOCTX_HALT) { - pIoCtx->fBlocked = true; + pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; rc = VINF_SUCCESS; } @@ -2766,7 +2395,7 @@ static int vdWriteHelperOptimizedCmpAndWriteAsync(PVDIOCTX pIoCtx) /* Write the full block to the virtual disk. */ RTSgBufReset(&pIoCtx->Req.Io.SgBuf); - pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCommitAsync; + pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; return rc; } @@ -2777,7 +2406,10 @@ static int vdWriteHelperOptimizedPreReadAsync(PVDIOCTX pIoCtx) LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); - if (pIoCtx->Req.Io.cbTransferLeft) + pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; + + if ( pIoCtx->Req.Io.cbTransferLeft + && !pIoCtx->cDataTransfersPending) rc = vdReadHelperAsync(pIoCtx); if ( RT_SUCCESS(rc) @@ -2836,7 +2468,8 @@ static int vdWriteHelperOptimizedAsync(PVDIOCTX pIoCtx) /* Read the entire data of the block so that we can compare whether it will * be modified by the write or not. */ - pIoCtx->Req.Io.cbTransferLeft = cbPreRead + cbThisWrite + cbPostRead - cbFill; + size_t cbTmp = cbPreRead + cbThisWrite + cbPostRead - cbFill; Assert(cbTmp == (uint32_t)cbTmp); + pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTmp; pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft; pIoCtx->Req.Io.uOffset -= cbPreRead; @@ -2845,6 +2478,141 @@ static int vdWriteHelperOptimizedAsync(PVDIOCTX pIoCtx) return VINF_SUCCESS; } +static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx) +{ + int rc = VINF_SUCCESS; + size_t cbPostRead = pIoCtx->Type.Child.cbPostRead; + size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; + PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent; + + LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); + + vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite); + if (cbPostRead) + { + size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill; + size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy; + size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage; + + /* Now assemble the remaining data. */ + if (cbWriteCopy) + { + /* + * The S/G buffer of the parent needs to be cloned because + * it is not allowed to modify the state. + */ + RTSGBUF SgBufParentTmp; + + RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf); + RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy); + } + + /* Zero out the remainder of this block. Will never be visible, as this + * is beyond the limit of the image. */ + if (cbFill) + { + RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage); + vdIoCtxSet(pIoCtx, '\0', cbFill); + } + + if (cbReadImage) + { + /* Read remaining data. */ + } + else + { + /* Write the full block to the virtual disk. */ + RTSgBufReset(&pIoCtx->Req.Io.SgBuf); + pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; + } + } + else + { + /* Write the full block to the virtual disk. */ + RTSgBufReset(&pIoCtx->Req.Io.SgBuf); + pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync; + } + + return rc; +} + +static int vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx) +{ + int rc = VINF_SUCCESS; + + LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); + + pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS; + + if (pIoCtx->Req.Io.cbTransferLeft) + rc = vdReadHelperAsync(pIoCtx); + + if ( RT_SUCCESS(rc) + && ( pIoCtx->Req.Io.cbTransferLeft + || pIoCtx->cMetaTransfersPending)) + rc = VERR_VD_ASYNC_IO_IN_PROGRESS; + else + pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble; + + return rc; +} + +static int vdWriteHelperStandardAsync(PVDIOCTX pIoCtx) +{ + PVBOXHDD pDisk = pIoCtx->pDisk; + uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved; + size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent; + size_t cbPreRead = pIoCtx->Type.Child.cbPreRead; + size_t cbPostRead = pIoCtx->Type.Child.cbPostRead; + size_t cbWrite = pIoCtx->Type.Child.cbWriteParent; + size_t cbFill = 0; + size_t cbWriteCopy = 0; + size_t cbReadImage = 0; + + LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); + + AssertPtr(pIoCtx->pIoCtxParent); + Assert(!pIoCtx->pIoCtxParent->pIoCtxParent); + + /* Calculate the amount of data to read that goes after the write to fill the block. */ + if (cbPostRead) + { + /* If we have data to be written, use that instead of reading + * data from the image. */ + cbWriteCopy; + if (cbWrite > cbThisWrite) + cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead); + + /* Figure out how much we cannot read from the image, because + * the last block to write might exceed the nominal size of the + * image for technical reasons. */ + if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize) + cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize; + + /* The rest must be read from the image. */ + cbReadImage = cbPostRead - cbWriteCopy - cbFill; + } + + pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill; + pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy; + pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage; + + /* Next step */ + if (cbPreRead) + { + pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync; + + /* Read the data that goes before the write to fill the block. */ + pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbPreRead; Assert(cbPreRead == (uint32_t)cbPreRead); + pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft; + pIoCtx->Req.Io.uOffset -= cbPreRead; + } + else + pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble; + + return VINF_SUCCESS; +} + /** * internal: write buffer to the image, taking care of block boundaries and * write optimizations - async version. @@ -2860,9 +2628,12 @@ static int vdWriteHelperAsync(PVDIOCTX pIoCtx) size_t cbThisWrite; size_t cbPreRead, cbPostRead; - rc = vdSetModifiedFlagAsync(pDisk, pIoCtx); - if (RT_FAILURE(rc)) /* Includes I/O in progress. */ - return rc; + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG)) + { + rc = vdSetModifiedFlagAsync(pDisk, pIoCtx); + if (RT_FAILURE(rc)) /* Includes I/O in progress. */ + return rc; + } rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite); if (RT_FAILURE(rc)) @@ -2880,7 +2651,7 @@ static int vdWriteHelperAsync(PVDIOCTX pIoCtx) cbThisWrite = cbWrite; fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME) ? 0 : VD_WRITE_NO_ALLOC; - rc = pImage->Backend->pfnAsyncWrite(pImage->pBackendData, uOffset, + rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset, cbThisWrite, pIoCtx, &cbThisWrite, &cbPreRead, &cbPostRead, fWrite); @@ -2923,9 +2694,10 @@ static int vdWriteHelperAsync(PVDIOCTX pIoCtx) pIoCtxWrite->Type.Child.cbPreRead = cbPreRead; pIoCtxWrite->Type.Child.cbPostRead = cbPostRead; + pIoCtxWrite->Req.Io.pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride; /* Process the write request */ - rc = vdIoCtxProcess(pIoCtxWrite); + rc = vdIoCtxProcessLocked(pIoCtxWrite); if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)) { @@ -2937,7 +2709,8 @@ static int vdWriteHelperAsync(PVDIOCTX pIoCtx) { LogFlow(("Child write request completed\n")); Assert(pIoCtx->Req.Io.cbTransferLeft >= cbThisWrite); - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbThisWrite); + Assert(cbThisWrite == (uint32_t)cbThisWrite); + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisWrite); vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ ); vdIoCtxFree(pDisk, pIoCtxWrite); @@ -2947,7 +2720,7 @@ static int vdWriteHelperAsync(PVDIOCTX pIoCtx) { LogFlow(("Child write pending\n")); ASMAtomicIncU32(&pIoCtx->cDataTransfersPending); - pIoCtx->fBlocked = true; + pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; rc = VERR_VD_ASYNC_IO_IN_PROGRESS; cbWrite -= cbThisWrite; uOffset += cbThisWrite; @@ -2965,7 +2738,7 @@ static int vdWriteHelperAsync(PVDIOCTX pIoCtx) { cbWrite -= cbThisWrite; uOffset += cbThisWrite; - pIoCtx->fBlocked = true; + pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED; break; } else if (rc == VERR_VD_NOT_ENOUGH_METADATA) @@ -3007,11 +2780,24 @@ static int vdFlushHelperAsync(PVDIOCTX pIoCtx) if (RT_SUCCESS(rc)) { vdResetModifiedFlag(pDisk); - rc = pImage->Backend->pfnAsyncFlush(pImage->pBackendData, pIoCtx); - if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) + rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx); + if ( ( RT_SUCCESS(rc) + || rc == VERR_VD_ASYNC_IO_IN_PROGRESS + || rc == VERR_VD_IOCTX_HALT) + && pDisk->pCache) + { + rc = pDisk->pCache->Backend->pfnFlush(pDisk->pCache->pBackendData, pIoCtx); + if ( RT_SUCCESS(rc) + || ( rc != VERR_VD_ASYNC_IO_IN_PROGRESS + && rc != VERR_VD_IOCTX_HALT)) + vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */); + else if (rc != VERR_VD_IOCTX_HALT) + rc = VINF_SUCCESS; + } + else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) rc = VINF_SUCCESS; - else if (rc == VINF_VD_ASYNC_IO_FINISHED) - vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDeferredReqs */); + else if (rc != VERR_VD_IOCTX_HALT)/* Some other error. */ + vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */); } return rc; @@ -3036,7 +2822,7 @@ static int vdDiscardWholeBlockAsync(PVDIOCTX pIoCtx) AssertPtr(pBlock); - rc = pDisk->pLast->Backend->pfnAsyncDiscard(pDisk->pLast->pBackendData, pIoCtx, + rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx, pBlock->Core.Key, pBlock->cbDiscard, &cbPreAllocated, &cbPostAllocated, &cbActuallyDiscarded, NULL, 0); @@ -3094,7 +2880,7 @@ static int vdDiscardRemoveBlocksAsync(PVBOXHDD pDisk, PVDIOCTX pIoCtx, size_t cb uint32_t idxStart = 0; size_t cbLeft = pBlock->cbDiscard; bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart); - uint32_t cSectors = pBlock->cbDiscard / 512; + uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512); while (cbLeft > 0) { @@ -3118,7 +2904,7 @@ static int vdDiscardRemoveBlocksAsync(PVBOXHDD pDisk, PVDIOCTX pIoCtx, size_t cb if (idxEnd != -1) cbThis = (idxEnd - idxStart) * 512; - rc = pDisk->pLast->Backend->pfnAsyncDiscard(pDisk->pLast->pBackendData, pIoCtx, + rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx, offStart, cbThis, NULL, NULL, &cbThis, NULL, VD_DISCARD_MARK_UNUSED); if ( RT_FAILURE(rc) @@ -3175,7 +2961,7 @@ static int vdDiscardCurrentRangeAsync(PVDIOCTX pIoCtx) LogFlowFunc(("pIoCtx=%#p\n", pIoCtx)); /* No block found, try to discard using the backend first. */ - rc = pDisk->pLast->Backend->pfnAsyncDiscard(pDisk->pLast->pBackendData, pIoCtx, + rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx, offStart, cbThisDiscard, &cbPreAllocated, &cbPostAllocated, &cbThisDiscard, &pbmAllocated, 0); @@ -3310,7 +3096,7 @@ static int vdDiscardHelperAsync(PVDIOCTX pIoCtx) Assert(!((offStart - pBlock->Core.Key) % 512)); idxStart = (offStart - pBlock->Core.Key) / 512; - idxEnd = idxStart + (cbThisDiscard / 512); + idxEnd = idxStart + (int32_t)(cbThisDiscard / 512); ASMBitClearRange(pBlock->pbmAllocated, idxStart, idxEnd); @@ -3318,7 +3104,7 @@ static int vdDiscardHelperAsync(PVDIOCTX pIoCtx) offStart += cbThisDiscard; /* Call the backend to discard the block if it is completely unallocated now. */ - if (ASMBitFirstSet((volatile void *)pBlock->pbmAllocated, pBlock->cbDiscard / 512) == -1) + if (ASMBitFirstSet((volatile void *)pBlock->pbmAllocated, (uint32_t)(pBlock->cbDiscard / 512)) == -1) { pIoCtx->Req.Discard.pBlock = pBlock; pIoCtx->pfnIoCtxTransferNext = vdDiscardWholeBlockAsync; @@ -3745,15 +3531,15 @@ static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq) PVBOXHDD pDisk = pIoCtx->pDisk; int rc = VINF_SUCCESS; - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + VD_IS_LOCKED(pDisk); if (RT_FAILURE(rcReq)) ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rcReq, VINF_SUCCESS); - if (!pIoCtx->fBlocked) + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED)) { /* Continue the transfer */ - rc = vdIoCtxProcess(pIoCtx); + rc = vdIoCtxProcessLocked(pIoCtx); if ( rc == VINF_VD_ASYNC_IO_FINISHED && ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false)) @@ -3776,7 +3562,7 @@ static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq) /* Update the parent state. */ Assert(pIoCtxParent->Req.Io.cbTransferLeft >= pIoCtx->Type.Child.cbTransferParent); - ASMAtomicSubU32(&pIoCtxParent->Req.Io.cbTransferLeft, pIoCtx->Type.Child.cbTransferParent); + ASMAtomicSubU32(&pIoCtxParent->Req.Io.cbTransferLeft, (uint32_t)pIoCtx->Type.Child.cbTransferParent); } else Assert(pIoCtx->enmTxDir == VDIOCTXTXDIR_FLUSH); @@ -3788,75 +3574,29 @@ static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq) vdIoCtxUnlockDisk(pDisk, pIoCtxParent, false /* fProcessDeferredReqs */); /* Unblock the parent */ - pIoCtxParent->fBlocked = false; + pIoCtxParent->fFlags &= ~VDIOCTX_FLAGS_BLOCKED; - rc = vdIoCtxProcess(pIoCtxParent); + rc = vdIoCtxProcessLocked(pIoCtxParent); if ( rc == VINF_VD_ASYNC_IO_FINISHED && ASMAtomicCmpXchgBool(&pIoCtxParent->fComplete, true, false)) { - RTCritSectLeave(&pDisk->CritSect); LogFlowFunc(("Parent I/O context completed pIoCtxParent=%#p rcReq=%Rrc\n", pIoCtxParent, pIoCtxParent->rcReq)); pIoCtxParent->Type.Root.pfnComplete(pIoCtxParent->Type.Root.pvUser1, pIoCtxParent->Type.Root.pvUser2, pIoCtxParent->rcReq); vdThreadFinishWrite(pDisk); vdIoCtxFree(pDisk, pIoCtxParent); - RTCritSectEnter(&pDisk->CritSect); + vdDiskProcessBlockedIoCtx(pDisk); } - - /* Process any pending writes if the current request didn't caused another growing. */ - if ( !RTListIsEmpty(&pDisk->ListWriteLocked) - && !vdIoCtxIsDiskLockOwner(pDisk, pIoCtx)) + else if (!vdIoCtxIsDiskLockOwner(pDisk, pIoCtx)) { - RTLISTNODE ListTmp; - - LogFlowFunc(("Before: pNext=%#p pPrev=%#p\n", pDisk->ListWriteLocked.pNext, - pDisk->ListWriteLocked.pPrev)); - - RTListMove(&ListTmp, &pDisk->ListWriteLocked); - - LogFlowFunc(("After: pNext=%#p pPrev=%#p\n", pDisk->ListWriteLocked.pNext, - pDisk->ListWriteLocked.pPrev)); - - RTCritSectLeave(&pDisk->CritSect); - - /* Process the list. */ - do - { - PVDIOCTXDEFERRED pDeferred = RTListGetFirst(&ListTmp, VDIOCTXDEFERRED, NodeDeferred); - PVDIOCTX pIoCtxWait = pDeferred->pIoCtx; - - AssertPtr(pIoCtxWait); - - RTListNodeRemove(&pDeferred->NodeDeferred); - RTMemFree(pDeferred); - - Assert(!pIoCtxWait->pIoCtxParent); - - pIoCtxWait->fBlocked = false; - LogFlowFunc(("Processing waiting I/O context pIoCtxWait=%#p\n", pIoCtxWait)); - - rc = vdIoCtxProcess(pIoCtxWait); - if ( rc == VINF_VD_ASYNC_IO_FINISHED - && ASMAtomicCmpXchgBool(&pIoCtxWait->fComplete, true, false)) - { - LogFlowFunc(("Waiting I/O context completed pIoCtxWait=%#p\n", pIoCtxWait)); - vdThreadFinishWrite(pDisk); - pIoCtxWait->Type.Root.pfnComplete(pIoCtxWait->Type.Root.pvUser1, - pIoCtxWait->Type.Root.pvUser2, - pIoCtxWait->rcReq); - vdIoCtxFree(pDisk, pIoCtxWait); - } - } while (!RTListIsEmpty(&ListTmp)); - - RTCritSectEnter(&pDisk->CritSect); + /* Process any pending writes if the current request didn't caused another growing. */ + vdDiskProcessBlockedIoCtx(pDisk); } } else { - RTCritSectLeave(&pDisk->CritSect); - if (pIoCtx->enmTxDir == VDIOCTXTXDIR_FLUSH) { vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDerredReqs */); @@ -3875,7 +3615,6 @@ static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq) pIoCtx->Type.Root.pfnComplete(pIoCtx->Type.Root.pvUser1, pIoCtx->Type.Root.pvUser2, pIoCtx->rcReq); - RTCritSectEnter(&pDisk->CritSect); } vdIoCtxFree(pDisk, pIoCtx); @@ -3899,9 +3638,10 @@ static int vdUserXferCompleted(PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx, LogFlowFunc(("pIoStorage=%#p pIoCtx=%#p pfnComplete=%#p pvUser=%#p cbTransfer=%zu rcReq=%Rrc\n", pIoStorage, pIoCtx, pfnComplete, pvUser, cbTransfer, rcReq)); - RTCritSectEnter(&pDisk->CritSect); + VD_IS_LOCKED(pDisk); + Assert(pIoCtx->Req.Io.cbTransferLeft >= cbTransfer); - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbTransfer); + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTransfer); Assert(cbTransfer == (uint32_t)cbTransfer); ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); if (pfnComplete) @@ -3912,8 +3652,6 @@ static int vdUserXferCompleted(PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx, else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) rc = VINF_SUCCESS; - vdDiskCritSectLeave(pDisk, NULL); - return rc; } @@ -3930,7 +3668,8 @@ static int vdMetaXferCompleted(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnCo LogFlowFunc(("pIoStorage=%#p pfnComplete=%#p pvUser=%#p pMetaXfer=%#p rcReq=%Rrc\n", pIoStorage, pfnComplete, pvUser, pMetaXfer, rcReq)); - RTCritSectEnter(&pDisk->CritSect); + VD_IS_LOCKED(pDisk); + fFlush = VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_FLUSH; VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE); @@ -3997,33 +3736,187 @@ static int vdMetaXferCompleted(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnCo else if (fFlush) RTMemFree(pMetaXfer); - vdDiskCritSectLeave(pDisk, NULL); - return VINF_SUCCESS; } -static int vdIOIntReqCompleted(void *pvUser, int rcReq) +/** + * Processes a list of waiting I/O tasks. The disk lock must be held by caller. + * + * @returns nothing. + * @param pDisk The disk to process the list for. + */ +static void vdIoTaskProcessWaitingList(PVBOXHDD pDisk) +{ + LogFlowFunc(("pDisk=%#p\n", pDisk)); + + VD_IS_LOCKED(pDisk); + + PVDIOTASK pHead = ASMAtomicXchgPtrT(&pDisk->pIoTasksPendingHead, NULL, PVDIOTASK); + + Log(("I/O task list cleared\n")); + + /* Reverse order. */ + PVDIOTASK pCur = pHead; + pHead = NULL; + while (pCur) + { + PVDIOTASK pInsert = pCur; + pCur = pCur->pNext; + pInsert->pNext = pHead; + pHead = pInsert; + } + + while (pHead) + { + PVDIOSTORAGE pIoStorage = pHead->pIoStorage; + + if (!pHead->fMeta) + vdUserXferCompleted(pIoStorage, pHead->Type.User.pIoCtx, + pHead->pfnComplete, pHead->pvUser, + pHead->Type.User.cbTransfer, pHead->rcReq); + else + vdMetaXferCompleted(pIoStorage, pHead->pfnComplete, pHead->pvUser, + pHead->Type.Meta.pMetaXfer, pHead->rcReq); + + pCur = pHead; + pHead = pHead->pNext; + vdIoTaskFree(pDisk, pCur); + } +} + +/** + * Process any I/O context on the halted list. + * + * @returns nothing. + * @param pDisk The disk. + */ +static void vdIoCtxProcessHaltedList(PVBOXHDD pDisk) +{ + LogFlowFunc(("pDisk=%#p\n", pDisk)); + + VD_IS_LOCKED(pDisk); + + /* Get the waiting list and process it in FIFO order. */ + PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHaltedHead, NULL, PVDIOCTX); + + /* Reverse it. */ + PVDIOCTX pCur = pIoCtxHead; + pIoCtxHead = NULL; + while (pCur) + { + PVDIOCTX pInsert = pCur; + pCur = pCur->pIoCtxNext; + pInsert->pIoCtxNext = pIoCtxHead; + pIoCtxHead = pInsert; + } + + /* Process now. */ + pCur = pIoCtxHead; + while (pCur) + { + PVDIOCTX pTmp = pCur; + + pCur = pCur->pIoCtxNext; + pTmp->pIoCtxNext = NULL; + + /* Continue */ + pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED; + vdIoCtxContinue(pTmp, pTmp->rcReq); + } +} + +/** + * Unlock the disk and process pending tasks. + * + * @returns VBox status code. + * @param pDisk The disk to unlock. + */ +static int vdDiskUnlock(PVBOXHDD pDisk, PVDIOCTX pIoCtxRc) { int rc = VINF_SUCCESS; - PVDIOTASK pIoTask = (PVDIOTASK)pvUser; - PVDIOSTORAGE pIoStorage = pIoTask->pIoStorage; - LogFlowFunc(("Task completed pIoTask=%#p\n", pIoTask)); + VD_IS_LOCKED(pDisk); - if (!pIoTask->fMeta) - rc = vdUserXferCompleted(pIoStorage, pIoTask->Type.User.pIoCtx, - pIoTask->pfnComplete, pIoTask->pvUser, - pIoTask->Type.User.cbTransfer, rcReq); - else - rc = vdMetaXferCompleted(pIoStorage, pIoTask->pfnComplete, pIoTask->pvUser, - pIoTask->Type.Meta.pMetaXfer, rcReq); + /* + * Process the list of waiting I/O tasks first + * because they might complete I/O contexts. + * Same for the list of halted I/O contexts. + * Afterwards comes the list of new I/O contexts. + */ + vdIoTaskProcessWaitingList(pDisk); + vdIoCtxProcessHaltedList(pDisk); + rc = vdDiskProcessWaitingIoCtx(pDisk, pIoCtxRc); + ASMAtomicXchgBool(&pDisk->fLocked, false); - vdIoTaskFree(pIoStorage->pVDIo->pDisk, pIoTask); + /* + * Need to check for new I/O tasks and waiting I/O contexts now + * again as other threads might added them while we processed + * previous lists. + */ + while ( ASMAtomicUoReadPtrT(&pDisk->pIoCtxHead, PVDIOCTX) != NULL + || ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK) != NULL + || ASMAtomicUoReadPtrT(&pDisk->pIoCtxHaltedHead, PVDIOCTX) != NULL) + { + /* Try lock disk again. */ + if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) + { + vdIoTaskProcessWaitingList(pDisk); + vdIoCtxProcessHaltedList(pDisk); + vdDiskProcessWaitingIoCtx(pDisk, NULL); + ASMAtomicXchgBool(&pDisk->fLocked, false); + } + else /* Let the other thread everything when he unlocks the disk. */ + break; + } return rc; } /** + * Try to lock the disk to complete pressing of the I/O task. + * The completion is deferred if the disk is locked already. + * + * @returns nothing. + * @param pIoTask The I/O task to complete. + */ +static void vdXferTryLockDiskDeferIoTask(PVDIOTASK pIoTask) +{ + PVDIOSTORAGE pIoStorage = pIoTask->pIoStorage; + PVBOXHDD pDisk = pIoStorage->pVDIo->pDisk; + + Log(("Deferring I/O task pIoTask=%p\n", pIoTask)); + + /* Put it on the waiting list. */ + PVDIOTASK pNext = ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK); + PVDIOTASK pHeadOld; + pIoTask->pNext = pNext; + while (!ASMAtomicCmpXchgExPtr(&pDisk->pIoTasksPendingHead, pIoTask, pNext, &pHeadOld)) + { + pNext = pHeadOld; + Assert(pNext != pIoTask); + pIoTask->pNext = pNext; + ASMNopPause(); + } + + if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) + { + /* Release disk lock, it will take care of processing all lists. */ + vdDiskUnlock(pDisk, NULL); + } +} + +static int vdIOIntReqCompleted(void *pvUser, int rcReq) +{ + PVDIOTASK pIoTask = (PVDIOTASK)pvUser; + + LogFlowFunc(("Task completed pIoTask=%#p\n", pIoTask)); + + pIoTask->rcReq = rcReq; + vdXferTryLockDiskDeferIoTask(pIoTask); + return VINF_SUCCESS; +} + +/** * VD I/O interface callback for opening a file. */ static int vdIOIntOpen(void *pvUser, const char *pszLocation, @@ -4068,16 +3961,15 @@ static int vdIOIntTreeMetaXferDestroy(PAVLRFOFFNODECORE pNode, void *pvUser) static int vdIOIntClose(void *pvUser, PVDIOSTORAGE pIoStorage) { - PVDIO pVDIo = (PVDIO)pvUser; - - int rc = pVDIo->pInterfaceIo->pfnClose(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage); - AssertRC(rc); + int rc = VINF_SUCCESS; + PVDIO pVDIo = (PVDIO)pvUser; + /* We free everything here, even if closing the file failed for some reason. */ + rc = pVDIo->pInterfaceIo->pfnClose(pVDIo->pInterfaceIo->Core.pvUser, pIoStorage->pStorage); RTAvlrFileOffsetDestroy(pIoStorage->pTreeMetaXfers, vdIOIntTreeMetaXferDestroy, NULL); RTMemFree(pIoStorage->pTreeMetaXfers); RTMemFree(pIoStorage); - return VINF_SUCCESS; + return rc; } static int vdIOIntDelete(void *pvUser, const char *pcszFilename) @@ -4127,41 +4019,8 @@ static int vdIOIntSetSize(void *pvUser, PVDIOSTORAGE pIoStorage, pIoStorage->pStorage, cbSize); } -static int vdIOIntWriteSync(void *pvUser, PVDIOSTORAGE pIoStorage, - uint64_t uOffset, const void *pvBuf, - size_t cbWrite, size_t *pcbWritten) -{ - PVDIO pVDIo = (PVDIO)pvUser; - return pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage, uOffset, - pvBuf, cbWrite, pcbWritten); -} - -static int vdIOIntReadSync(void *pvUser, PVDIOSTORAGE pIoStorage, - uint64_t uOffset, void *pvBuf, size_t cbRead, - size_t *pcbRead) -{ - PVDIO pVDIo = (PVDIO)pvUser; - return pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage, uOffset, - pvBuf, cbRead, pcbRead); -} - -static int vdIOIntFlushSync(void *pvUser, PVDIOSTORAGE pIoStorage) -{ - int rc = VINF_SUCCESS; - PVDIO pVDIo = (PVDIO)pvUser; - - if (!pVDIo->fIgnoreFlush) - rc = pVDIo->pInterfaceIo->pfnFlushSync(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage); - - return rc; -} - -static int vdIOIntReadUserAsync(void *pvUser, PVDIOSTORAGE pIoStorage, - uint64_t uOffset, PVDIOCTX pIoCtx, - size_t cbRead) +static int vdIOIntReadUser(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset, + PVDIOCTX pIoCtx, size_t cbRead) { int rc = VINF_SUCCESS; PVDIO pVDIo = (PVDIO)pvUser; @@ -4170,70 +4029,96 @@ static int vdIOIntReadUserAsync(void *pvUser, PVDIOSTORAGE pIoStorage, LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pIoCtx=%#p cbRead=%u\n", pvUser, pIoStorage, uOffset, pIoCtx, cbRead)); - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + /** @todo: Enable check for sync I/O later. */ + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); Assert(cbRead > 0); - /* Build the S/G array and spawn a new I/O task */ - while (cbRead) + if (pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC) { - RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX]; - unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX; - size_t cbTaskRead = 0; + RTSGSEG Seg; + unsigned cSegments = 1; + size_t cbTaskRead = 0; - cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbRead); + /* Synchronous I/O contexts only have one buffer segment. */ + AssertMsgReturn(pIoCtx->Req.Io.SgBuf.cSegs == 1, + ("Invalid number of buffer segments for synchronous I/O context"), + VERR_INVALID_PARAMETER); + + cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, &Seg, &cSegments, cbRead); + Assert(cbRead == cbTaskRead); + Assert(cSegments == 1); + rc = pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, uOffset, + Seg.pvSeg, cbRead, NULL); + if (RT_SUCCESS(rc)) + { + Assert(cbRead == (uint32_t)cbRead); + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbRead); + } + } + else + { + /* Build the S/G array and spawn a new I/O task */ + while (cbRead) + { + RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX]; + unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX; + size_t cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbRead); - Assert(cSegments > 0); - Assert(cbTaskRead > 0); - AssertMsg(cbTaskRead <= cbRead, ("Invalid number of bytes to read\n")); + Assert(cSegments > 0); + Assert(cbTaskRead > 0); + AssertMsg(cbTaskRead <= cbRead, ("Invalid number of bytes to read\n")); - LogFlow(("Reading %u bytes into %u segments\n", cbTaskRead, cSegments)); + LogFlow(("Reading %u bytes into %u segments\n", cbTaskRead, cSegments)); #ifdef RT_STRICT - for (unsigned i = 0; i < cSegments; i++) - AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512), - ("Segment %u is invalid\n", i)); + for (unsigned i = 0; i < cSegments; i++) + AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512), + ("Segment %u is invalid\n", i)); #endif - PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, NULL, NULL, pIoCtx, cbTaskRead); + Assert(cbTaskRead == (uint32_t)cbTaskRead); + PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, NULL, NULL, pIoCtx, (uint32_t)cbTaskRead); - if (!pIoTask) - return VERR_NO_MEMORY; + if (!pIoTask) + return VERR_NO_MEMORY; - ASMAtomicIncU32(&pIoCtx->cDataTransfersPending); + ASMAtomicIncU32(&pIoCtx->cDataTransfersPending); - void *pvTask; - rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage, uOffset, - aSeg, cSegments, cbTaskRead, pIoTask, - &pvTask); - if (RT_SUCCESS(rc)) - { - AssertMsg(cbTaskRead <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n")); - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbTaskRead); - ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); - vdIoTaskFree(pDisk, pIoTask); - } - else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS) - { - ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); - vdIoTaskFree(pDisk, pIoTask); - break; - } + void *pvTask; + Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx)); + rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, uOffset, + aSeg, cSegments, cbTaskRead, pIoTask, + &pvTask); + if (RT_SUCCESS(rc)) + { + AssertMsg(cbTaskRead <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n")); + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTaskRead); + ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); + vdIoTaskFree(pDisk, pIoTask); + } + else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS) + { + ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); + vdIoTaskFree(pDisk, pIoTask); + break; + } - uOffset += cbTaskRead; - cbRead -= cbTaskRead; + uOffset += cbTaskRead; + cbRead -= cbTaskRead; + } } LogFlowFunc(("returns rc=%Rrc\n", rc)); return rc; } -static int vdIOIntWriteUserAsync(void *pvUser, PVDIOSTORAGE pIoStorage, - uint64_t uOffset, PVDIOCTX pIoCtx, - size_t cbWrite, - PFNVDXFERCOMPLETED pfnComplete, - void *pvCompleteUser) +static int vdIOIntWriteUser(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset, + PVDIOCTX pIoCtx, size_t cbWrite, PFNVDXFERCOMPLETED pfnComplete, + void *pvCompleteUser) { int rc = VINF_SUCCESS; PVDIO pVDIo = (PVDIO)pvUser; @@ -4242,70 +4127,99 @@ static int vdIOIntWriteUserAsync(void *pvUser, PVDIOSTORAGE pIoStorage, LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pIoCtx=%#p cbWrite=%u\n", pvUser, pIoStorage, uOffset, pIoCtx, cbWrite)); - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + /** @todo: Enable check for sync I/O later. */ + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); Assert(cbWrite > 0); - /* Build the S/G array and spawn a new I/O task */ - while (cbWrite) + if (pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC) { - RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX]; - unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX; - size_t cbTaskWrite = 0; + RTSGSEG Seg; + unsigned cSegments = 1; + size_t cbTaskWrite = 0; - cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbWrite); + /* Synchronous I/O contexts only have one buffer segment. */ + AssertMsgReturn(pIoCtx->Req.Io.SgBuf.cSegs == 1, + ("Invalid number of buffer segments for synchronous I/O context"), + VERR_INVALID_PARAMETER); - Assert(cSegments > 0); - Assert(cbTaskWrite > 0); - AssertMsg(cbTaskWrite <= cbWrite, ("Invalid number of bytes to write\n")); + cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, &Seg, &cSegments, cbWrite); + Assert(cbWrite == cbTaskWrite); + Assert(cSegments == 1); + rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, uOffset, + Seg.pvSeg, cbWrite, NULL); + if (RT_SUCCESS(rc)) + { + Assert(pIoCtx->Req.Io.cbTransferLeft >= cbWrite); + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbWrite); + } + } + else + { + /* Build the S/G array and spawn a new I/O task */ + while (cbWrite) + { + RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX]; + unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX; + size_t cbTaskWrite = 0; - LogFlow(("Writing %u bytes from %u segments\n", cbTaskWrite, cSegments)); + cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbWrite); + + Assert(cSegments > 0); + Assert(cbTaskWrite > 0); + AssertMsg(cbTaskWrite <= cbWrite, ("Invalid number of bytes to write\n")); + + LogFlow(("Writing %u bytes from %u segments\n", cbTaskWrite, cSegments)); #ifdef DEBUG - for (unsigned i = 0; i < cSegments; i++) - AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512), - ("Segment %u is invalid\n", i)); + for (unsigned i = 0; i < cSegments; i++) + AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512), + ("Segment %u is invalid\n", i)); #endif - PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, pfnComplete, pvCompleteUser, pIoCtx, cbTaskWrite); + Assert(cbTaskWrite == (uint32_t)cbTaskWrite); + PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, pfnComplete, pvCompleteUser, pIoCtx, (uint32_t)cbTaskWrite); - if (!pIoTask) - return VERR_NO_MEMORY; + if (!pIoTask) + return VERR_NO_MEMORY; - ASMAtomicIncU32(&pIoCtx->cDataTransfersPending); + ASMAtomicIncU32(&pIoCtx->cDataTransfersPending); - void *pvTask; - rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage, - uOffset, aSeg, cSegments, - cbTaskWrite, pIoTask, &pvTask); - if (RT_SUCCESS(rc)) - { - AssertMsg(cbTaskWrite <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n")); - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbTaskWrite); - ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); - vdIoTaskFree(pDisk, pIoTask); - } - else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS) - { - ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); - vdIoTaskFree(pDisk, pIoTask); - break; - } + void *pvTask; + Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx)); + rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, + uOffset, aSeg, cSegments, + cbTaskWrite, pIoTask, &pvTask); + if (RT_SUCCESS(rc)) + { + AssertMsg(cbTaskWrite <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n")); + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTaskWrite); + ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); + vdIoTaskFree(pDisk, pIoTask); + } + else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS) + { + ASMAtomicDecU32(&pIoCtx->cDataTransfersPending); + vdIoTaskFree(pDisk, pIoTask); + break; + } - uOffset += cbTaskWrite; - cbWrite -= cbTaskWrite; + uOffset += cbTaskWrite; + cbWrite -= cbTaskWrite; + } } + LogFlowFunc(("returns rc=%Rrc\n", rc)); return rc; } -static int vdIOIntReadMetaAsync(void *pvUser, PVDIOSTORAGE pIoStorage, - uint64_t uOffset, void *pvBuf, - size_t cbRead, PVDIOCTX pIoCtx, - PPVDMETAXFER ppMetaXfer, - PFNVDXFERCOMPLETED pfnComplete, - void *pvCompleteUser) +static int vdIOIntReadMeta(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset, + void *pvBuf, size_t cbRead, PVDIOCTX pIoCtx, + PPVDMETAXFER ppMetaXfer, PFNVDXFERCOMPLETED pfnComplete, + void *pvCompleteUser) { PVDIO pVDIo = (PVDIO)pvUser; PVBOXHDD pDisk = pVDIo->pDisk; @@ -4318,91 +4232,112 @@ static int vdIOIntReadMetaAsync(void *pvUser, PVDIOSTORAGE pIoStorage, LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pvBuf=%#p cbRead=%u\n", pvUser, pIoStorage, uOffset, pvBuf, cbRead)); - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + AssertMsgReturn( pIoCtx + || (!ppMetaXfer && !pfnComplete && !pvCompleteUser), + ("A synchronous metadata read is requested but the parameters are wrong\n"), + VERR_INVALID_POINTER); - pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset); - if (!pMetaXfer) + /** @todo: Enable check for sync I/O later. */ + if ( pIoCtx + && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); + + if ( !pIoCtx + || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC) { + /* Handle synchronous metadata I/O. */ + /** @todo: Integrate with metadata transfers below. */ + rc = pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, uOffset, + pvBuf, cbRead, NULL); + if (ppMetaXfer) + *ppMetaXfer = NULL; + } + else + { + pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset); + if (!pMetaXfer) + { #ifdef RT_STRICT - pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGetBestFit(pIoStorage->pTreeMetaXfers, uOffset, false /* fAbove */); - AssertMsg(!pMetaXfer || (pMetaXfer->Core.Key + (RTFOFF)pMetaXfer->cbMeta <= (RTFOFF)uOffset), - ("Overlapping meta transfers!\n")); + pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGetBestFit(pIoStorage->pTreeMetaXfers, uOffset, false /* fAbove */); + AssertMsg(!pMetaXfer || (pMetaXfer->Core.Key + (RTFOFF)pMetaXfer->cbMeta <= (RTFOFF)uOffset), + ("Overlapping meta transfers!\n")); #endif - /* Allocate a new meta transfer. */ - pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbRead); - if (!pMetaXfer) - return VERR_NO_MEMORY; + /* Allocate a new meta transfer. */ + pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbRead); + if (!pMetaXfer) + return VERR_NO_MEMORY; - pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer); - if (!pIoTask) - { - RTMemFree(pMetaXfer); - return VERR_NO_MEMORY; - } + pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer); + if (!pIoTask) + { + RTMemFree(pMetaXfer); + return VERR_NO_MEMORY; + } - Seg.cbSeg = cbRead; - Seg.pvSeg = pMetaXfer->abData; + Seg.cbSeg = cbRead; + Seg.pvSeg = pMetaXfer->abData; - VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_READ); - rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage, - uOffset, &Seg, 1, - cbRead, pIoTask, &pvTask); + VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_READ); + rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, + uOffset, &Seg, 1, + cbRead, pIoTask, &pvTask); - if (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) - { - bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core); - Assert(fInserted); - } - else - RTMemFree(pMetaXfer); + if (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) + { + bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core); + Assert(fInserted); + } + else + RTMemFree(pMetaXfer); - if (RT_SUCCESS(rc)) - { - VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE); - vdIoTaskFree(pDisk, pIoTask); + if (RT_SUCCESS(rc)) + { + VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE); + vdIoTaskFree(pDisk, pIoTask); + } + else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS && !pfnComplete) + rc = VERR_VD_NOT_ENOUGH_METADATA; } - else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS && !pfnComplete) - rc = VERR_VD_NOT_ENOUGH_METADATA; - } - Assert(VALID_PTR(pMetaXfer) || RT_FAILURE(rc)); + Assert(VALID_PTR(pMetaXfer) || RT_FAILURE(rc)); - if (RT_SUCCESS(rc) || rc == VERR_VD_NOT_ENOUGH_METADATA || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) - { - /* If it is pending add the request to the list. */ - if (VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_READ) + if (RT_SUCCESS(rc) || rc == VERR_VD_NOT_ENOUGH_METADATA || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) { - PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED)); - AssertPtr(pDeferred); + /* If it is pending add the request to the list. */ + if (VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_READ) + { + PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED)); + AssertPtr(pDeferred); - RTListInit(&pDeferred->NodeDeferred); - pDeferred->pIoCtx = pIoCtx; + RTListInit(&pDeferred->NodeDeferred); + pDeferred->pIoCtx = pIoCtx; - ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending); - RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred); - rc = VERR_VD_NOT_ENOUGH_METADATA; - } - else - { - /* Transfer the data. */ - pMetaXfer->cRefs++; - Assert(pMetaXfer->cbMeta >= cbRead); - Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset); - memcpy(pvBuf, pMetaXfer->abData, cbRead); - *ppMetaXfer = pMetaXfer; + ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending); + RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred); + rc = VERR_VD_NOT_ENOUGH_METADATA; + } + else + { + /* Transfer the data. */ + pMetaXfer->cRefs++; + Assert(pMetaXfer->cbMeta >= cbRead); + Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset); + memcpy(pvBuf, pMetaXfer->abData, cbRead); + *ppMetaXfer = pMetaXfer; + } } } + LogFlowFunc(("returns rc=%Rrc\n", rc)); return rc; } -static int vdIOIntWriteMetaAsync(void *pvUser, PVDIOSTORAGE pIoStorage, - uint64_t uOffset, void *pvBuf, - size_t cbWrite, PVDIOCTX pIoCtx, - PFNVDXFERCOMPLETED pfnComplete, - void *pvCompleteUser) +static int vdIOIntWriteMeta(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset, + const void *pvBuf, size_t cbWrite, PVDIOCTX pIoCtx, + PFNVDXFERCOMPLETED pfnComplete, void *pvCompleteUser) { PVDIO pVDIo = (PVDIO)pvUser; PVBOXHDD pDisk = pVDIo->pDisk; @@ -4416,79 +4351,100 @@ static int vdIOIntWriteMetaAsync(void *pvUser, PVDIOSTORAGE pIoStorage, LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pvBuf=%#p cbWrite=%u\n", pvUser, pIoStorage, uOffset, pvBuf, cbWrite)); - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + AssertMsgReturn( pIoCtx + || (!pfnComplete && !pvCompleteUser), + ("A synchronous metadata write is requested but the parameters are wrong\n"), + VERR_INVALID_POINTER); - pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset); - if (!pMetaXfer) + /** @todo: Enable check for sync I/O later. */ + if ( pIoCtx + && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); + + if ( !pIoCtx + || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC) { - /* Allocate a new meta transfer. */ - pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbWrite); - if (!pMetaXfer) - return VERR_NO_MEMORY; + /* Handle synchronous metadata I/O. */ + /** @todo: Integrate with metadata transfers below. */ + rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, uOffset, + pvBuf, cbWrite, NULL); } else { - Assert(pMetaXfer->cbMeta >= cbWrite); - Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset); - fInTree = true; - } + pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset); + if (!pMetaXfer) + { + /* Allocate a new meta transfer. */ + pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbWrite); + if (!pMetaXfer) + return VERR_NO_MEMORY; + } + else + { + Assert(pMetaXfer->cbMeta >= cbWrite); + Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset); + fInTree = true; + } - Assert(VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE); + Assert(VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE); - pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer); - if (!pIoTask) - { - RTMemFree(pMetaXfer); - return VERR_NO_MEMORY; - } + pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer); + if (!pIoTask) + { + RTMemFree(pMetaXfer); + return VERR_NO_MEMORY; + } - memcpy(pMetaXfer->abData, pvBuf, cbWrite); - Seg.cbSeg = cbWrite; - Seg.pvSeg = pMetaXfer->abData; + memcpy(pMetaXfer->abData, pvBuf, cbWrite); + Seg.cbSeg = cbWrite; + Seg.pvSeg = pMetaXfer->abData; - ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending); + ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending); - VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_WRITE); - rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage, - uOffset, &Seg, 1, cbWrite, pIoTask, - &pvTask); - if (RT_SUCCESS(rc)) - { - VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE); - ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending); - vdIoTaskFree(pDisk, pIoTask); - if (fInTree && !pMetaXfer->cRefs) + VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_WRITE); + rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, + uOffset, &Seg, 1, cbWrite, pIoTask, + &pvTask); + if (RT_SUCCESS(rc)) { - LogFlow(("Removing meta xfer=%#p\n", pMetaXfer)); - bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL; - AssertMsg(fRemoved, ("Metadata transfer wasn't removed\n")); - RTMemFree(pMetaXfer); - pMetaXfer = NULL; + VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE); + ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending); + vdIoTaskFree(pDisk, pIoTask); + if (fInTree && !pMetaXfer->cRefs) + { + LogFlow(("Removing meta xfer=%#p\n", pMetaXfer)); + bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL; + AssertMsg(fRemoved, ("Metadata transfer wasn't removed\n")); + RTMemFree(pMetaXfer); + pMetaXfer = NULL; + } } - } - else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) - { - PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED)); - AssertPtr(pDeferred); + else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS) + { + PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED)); + AssertPtr(pDeferred); - RTListInit(&pDeferred->NodeDeferred); - pDeferred->pIoCtx = pIoCtx; + RTListInit(&pDeferred->NodeDeferred); + pDeferred->pIoCtx = pIoCtx; + + if (!fInTree) + { + bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core); + Assert(fInserted); + } - if (!fInTree) + RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred); + } + else { - bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core); - Assert(fInserted); + RTMemFree(pMetaXfer); + pMetaXfer = NULL; } - - RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred); - } - else - { - RTMemFree(pMetaXfer); - pMetaXfer = NULL; } + LogFlowFunc(("returns rc=%Rrc\n", rc)); return rc; } @@ -4496,9 +4452,18 @@ static void vdIOIntMetaXferRelease(void *pvUser, PVDMETAXFER pMetaXfer) { PVDIO pVDIo = (PVDIO)pvUser; PVBOXHDD pDisk = pVDIo->pDisk; - PVDIOSTORAGE pIoStorage = pMetaXfer->pIoStorage; + PVDIOSTORAGE pIoStorage; - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + /* + * It is possible that we get called with a NULL metadata xfer handle + * for synchronous I/O. Just exit. + */ + if (!pMetaXfer) + return; + + pIoStorage = pMetaXfer->pIoStorage; + + VD_IS_LOCKED(pDisk); Assert( VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE || VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE); @@ -4518,9 +4483,8 @@ static void vdIOIntMetaXferRelease(void *pvUser, PVDMETAXFER pMetaXfer) } } -static int vdIOIntFlushAsync(void *pvUser, PVDIOSTORAGE pIoStorage, - PVDIOCTX pIoCtx, PFNVDXFERCOMPLETED pfnComplete, - void *pvCompleteUser) +static int vdIOIntFlush(void *pvUser, PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx, + PFNVDXFERCOMPLETED pfnComplete, void *pvCompleteUser) { PVDIO pVDIo = (PVDIO)pvUser; PVBOXHDD pDisk = pVDIo->pDisk; @@ -4529,66 +4493,89 @@ static int vdIOIntFlushAsync(void *pvUser, PVDIOSTORAGE pIoStorage, PVDMETAXFER pMetaXfer = NULL; void *pvTask = NULL; - VD_THREAD_IS_CRITSECT_OWNER(pDisk); - LogFlowFunc(("pvUser=%#p pIoStorage=%#p pIoCtx=%#p\n", pvUser, pIoStorage, pIoCtx)); + AssertMsgReturn( pIoCtx + || (!pfnComplete && !pvCompleteUser), + ("A synchronous metadata write is requested but the parameters are wrong\n"), + VERR_INVALID_POINTER); + + /** @todo: Enable check for sync I/O later. */ + if ( pIoCtx + && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); + if (pVDIo->fIgnoreFlush) return VINF_SUCCESS; - /* Allocate a new meta transfer. */ - pMetaXfer = vdMetaXferAlloc(pIoStorage, 0, 0); - if (!pMetaXfer) - return VERR_NO_MEMORY; - - pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvUser, pMetaXfer); - if (!pIoTask) + if ( !pIoCtx + || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC) { - RTMemFree(pMetaXfer); - return VERR_NO_MEMORY; + /* Handle synchronous flushes. */ + /** @todo: Integrate with metadata transfers below. */ + rc = pVDIo->pInterfaceIo->pfnFlushSync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage); } + else + { + /* Allocate a new meta transfer. */ + pMetaXfer = vdMetaXferAlloc(pIoStorage, 0, 0); + if (!pMetaXfer) + return VERR_NO_MEMORY; - ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending); + pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvUser, pMetaXfer); + if (!pIoTask) + { + RTMemFree(pMetaXfer); + return VERR_NO_MEMORY; + } - PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED)); - AssertPtr(pDeferred); + ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending); - RTListInit(&pDeferred->NodeDeferred); - pDeferred->pIoCtx = pIoCtx; + PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED)); + AssertPtr(pDeferred); - RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred); - VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_FLUSH); - rc = pVDIo->pInterfaceIo->pfnFlushAsync(pVDIo->pInterfaceIo->Core.pvUser, - pIoStorage->pStorage, - pIoTask, &pvTask); - if (RT_SUCCESS(rc)) - { - VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE); - ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending); - vdIoTaskFree(pDisk, pIoTask); - RTMemFree(pDeferred); - RTMemFree(pMetaXfer); + RTListInit(&pDeferred->NodeDeferred); + pDeferred->pIoCtx = pIoCtx; + + RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred); + VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_FLUSH); + rc = pVDIo->pInterfaceIo->pfnFlushAsync(pVDIo->pInterfaceIo->Core.pvUser, + pIoStorage->pStorage, + pIoTask, &pvTask); + if (RT_SUCCESS(rc)) + { + VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE); + ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending); + vdIoTaskFree(pDisk, pIoTask); + RTMemFree(pDeferred); + RTMemFree(pMetaXfer); + } + else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS) + RTMemFree(pMetaXfer); } - else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS) - RTMemFree(pMetaXfer); + LogFlowFunc(("returns rc=%Rrc\n", rc)); return rc; } static size_t vdIOIntIoCtxCopyTo(void *pvUser, PVDIOCTX pIoCtx, - void *pvBuf, size_t cbBuf) + const void *pvBuf, size_t cbBuf) { PVDIO pVDIo = (PVDIO)pvUser; PVBOXHDD pDisk = pVDIo->pDisk; size_t cbCopied = 0; - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + /** @todo: Enable check for sync I/O later. */ + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); cbCopied = vdIoCtxCopyTo(pIoCtx, (uint8_t *)pvBuf, cbBuf); Assert(cbCopied == cbBuf); - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbCopied); + /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft >= cbCopied); - triggers with vdCopyHelper/dmgRead. + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCopied); return cbCopied; } @@ -4600,12 +4587,15 @@ static size_t vdIOIntIoCtxCopyFrom(void *pvUser, PVDIOCTX pIoCtx, PVBOXHDD pDisk = pVDIo->pDisk; size_t cbCopied = 0; - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + /** @todo: Enable check for sync I/O later. */ + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); cbCopied = vdIoCtxCopyFrom(pIoCtx, (uint8_t *)pvBuf, cbBuf); Assert(cbCopied == cbBuf); - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbCopied); + /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft > cbCopied); - triggers with vdCopyHelper/dmgRead. + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCopied); return cbCopied; } @@ -4616,12 +4606,15 @@ static size_t vdIOIntIoCtxSet(void *pvUser, PVDIOCTX pIoCtx, int ch, size_t cb) PVBOXHDD pDisk = pVDIo->pDisk; size_t cbSet = 0; - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + /** @todo: Enable check for sync I/O later. */ + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); cbSet = vdIoCtxSet(pIoCtx, ch, cb); Assert(cbSet == cb); - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbSet); + /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft >= cbSet); - triggers with vdCopyHelper/dmgRead. + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbSet); return cbSet; } @@ -4634,7 +4627,9 @@ static size_t vdIOIntIoCtxSegArrayCreate(void *pvUser, PVDIOCTX pIoCtx, PVBOXHDD pDisk = pVDIo->pDisk; size_t cbCreated = 0; - VD_THREAD_IS_CRITSECT_OWNER(pDisk); + /** @todo: Enable check for sync I/O later. */ + if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)) + VD_IS_LOCKED(pDisk); cbCreated = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, paSeg, pcSeg, cbData); Assert(!paSeg || cbData == cbCreated); @@ -4648,30 +4643,54 @@ static void vdIOIntIoCtxCompleted(void *pvUser, PVDIOCTX pIoCtx, int rcReq, PVDIO pVDIo = (PVDIO)pvUser; PVBOXHDD pDisk = pVDIo->pDisk; + LogFlowFunc(("pvUser=%#p pIoCtx=%#p rcReq=%Rrc cbCompleted=%zu\n", + pvUser, pIoCtx, rcReq, cbCompleted)); + /* * Grab the disk critical section to avoid races with other threads which * might still modify the I/O context. * Example is that iSCSI is doing an asynchronous write but calls us already * while the other thread is still hanging in vdWriteHelperAsync and couldn't update - * the fBlocked state yet. + * the blocked state yet. * It can overwrite the state to true before we call vdIoCtxContinue and the * the request would hang indefinite. */ - int rc = RTCritSectEnter(&pDisk->CritSect); - AssertRC(rc); + ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rcReq, VINF_SUCCESS); + Assert(pIoCtx->Req.Io.cbTransferLeft >= cbCompleted); + ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCompleted); - /* Continue */ - pIoCtx->fBlocked = false; - ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, cbCompleted); - - /* Clear the pointer to next transfer function in case we have nothing to transfer anymore. - * @todo: Find a better way to prevent vdIoCtxContinue from calling the read/write helper again. */ + /* Set next transfer function if the current one finished. + * @todo: Find a better way to prevent vdIoCtxContinue from calling the current helper again. */ if (!pIoCtx->Req.Io.cbTransferLeft) - pIoCtx->pfnIoCtxTransfer = NULL; + { + pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext; + pIoCtx->pfnIoCtxTransferNext = NULL; + } - vdIoCtxContinue(pIoCtx, rcReq); + vdIoCtxAddToWaitingList(&pDisk->pIoCtxHaltedHead, pIoCtx); + if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false)) + { + /* Immediately drop the lock again, it will take care of processing the list. */ + vdDiskUnlock(pDisk, NULL); + } +} - vdDiskCritSectLeave(pDisk, NULL); +static DECLCALLBACK(bool) vdIOIntIoCtxIsSynchronous(void *pvUser, PVDIOCTX pIoCtx) +{ + NOREF(pvUser); + return !!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC); +} + +static DECLCALLBACK(bool) vdIOIntIoCtxIsZero(void *pvUser, PVDIOCTX pIoCtx, size_t cbCheck, + bool fAdvance) +{ + NOREF(pvUser); + + bool fIsZero = RTSgBufIsZero(&pIoCtx->Req.Io.SgBuf, cbCheck); + if (fIsZero && fAdvance) + RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbCheck); + + return fIsZero; } /** @@ -4700,10 +4719,9 @@ static int vdIOIntCloseLimited(void *pvUser, PVDIOSTORAGE pIoStorage) { PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser; int rc = pInterfaceIo->pfnClose(NULL, pIoStorage->pStorage); - AssertRC(rc); RTMemFree(pIoStorage); - return VINF_SUCCESS; + return rc; } static int vdIOIntDeleteLimited(void *pvUser, const char *pcszFilename) @@ -4748,26 +4766,85 @@ static int vdIOIntSetSizeLimited(void *pvUser, PVDIOSTORAGE pIoStorage, return pInterfaceIo->pfnSetSize(NULL, pIoStorage->pStorage, cbSize); } -static int vdIOIntWriteSyncLimited(void *pvUser, PVDIOSTORAGE pIoStorage, - uint64_t uOffset, const void *pvBuf, - size_t cbWrite, size_t *pcbWritten) +static int vdIOIntWriteUserLimited(void *pvUser, PVDIOSTORAGE pStorage, + uint64_t uOffset, PVDIOCTX pIoCtx, + size_t cbWrite, + PFNVDXFERCOMPLETED pfnComplete, + void *pvCompleteUser) +{ + NOREF(pvUser); + NOREF(pStorage); + NOREF(uOffset); + NOREF(pIoCtx); + NOREF(cbWrite); + NOREF(pfnComplete); + NOREF(pvCompleteUser); + AssertMsgFailedReturn(("This needs to be implemented when called\n"), VERR_NOT_IMPLEMENTED); +} + +static int vdIOIntReadUserLimited(void *pvUser, PVDIOSTORAGE pStorage, + uint64_t uOffset, PVDIOCTX pIoCtx, + size_t cbRead) +{ + NOREF(pvUser); + NOREF(pStorage); + NOREF(uOffset); + NOREF(pIoCtx); + NOREF(cbRead); + AssertMsgFailedReturn(("This needs to be implemented when called\n"), VERR_NOT_IMPLEMENTED); +} + +static int vdIOIntWriteMetaLimited(void *pvUser, PVDIOSTORAGE pStorage, + uint64_t uOffset, const void *pvBuffer, + size_t cbBuffer, PVDIOCTX pIoCtx, + PFNVDXFERCOMPLETED pfnComplete, + void *pvCompleteUser) { PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser; - return pInterfaceIo->pfnWriteSync(NULL, pIoStorage->pStorage, uOffset, pvBuf, cbWrite, pcbWritten); + + AssertMsgReturn(!pIoCtx && !pfnComplete && !pvCompleteUser, + ("Async I/O not implemented for the limited interface"), + VERR_NOT_SUPPORTED); + + return pInterfaceIo->pfnWriteSync(NULL, pStorage->pStorage, uOffset, pvBuffer, cbBuffer, NULL); } -static int vdIOIntReadSyncLimited(void *pvUser, PVDIOSTORAGE pIoStorage, - uint64_t uOffset, void *pvBuf, size_t cbRead, - size_t *pcbRead) +static int vdIOIntReadMetaLimited(void *pvUser, PVDIOSTORAGE pStorage, + uint64_t uOffset, void *pvBuffer, + size_t cbBuffer, PVDIOCTX pIoCtx, + PPVDMETAXFER ppMetaXfer, + PFNVDXFERCOMPLETED pfnComplete, + void *pvCompleteUser) { PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser; - return pInterfaceIo->pfnReadSync(NULL, pIoStorage->pStorage, uOffset, pvBuf, cbRead, pcbRead); + + AssertMsgReturn(!pIoCtx && !ppMetaXfer && !pfnComplete && !pvCompleteUser, + ("Async I/O not implemented for the limited interface"), + VERR_NOT_SUPPORTED); + + return pInterfaceIo->pfnReadSync(NULL, pStorage->pStorage, uOffset, pvBuffer, cbBuffer, NULL); +} + +static int vdIOIntMetaXferReleaseLimited(void *pvUser, PVDMETAXFER pMetaXfer) +{ + /* This is a NOP in this case. */ + NOREF(pvUser); + NOREF(pMetaXfer); + return VINF_SUCCESS; } -static int vdIOIntFlushSyncLimited(void *pvUser, PVDIOSTORAGE pIoStorage) +static int vdIOIntFlushLimited(void *pvUser, PVDIOSTORAGE pStorage, + PVDIOCTX pIoCtx, + PFNVDXFERCOMPLETED pfnComplete, + void *pvCompleteUser) { PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser; - return pInterfaceIo->pfnFlushSync(NULL, pIoStorage->pStorage); + + AssertMsgReturn(!pIoCtx && !pfnComplete && !pvCompleteUser, + ("Async I/O not implemented for the limited interface"), + VERR_NOT_SUPPORTED); + + return pInterfaceIo->pfnFlushSync(NULL, pStorage->pStorage); } /** @@ -4886,20 +4963,30 @@ static void vdIfIoIntCallbacksSetup(PVDINTERFACEIOINT pIfIoInt) pIfIoInt->pfnGetModificationTime = vdIOIntGetModificationTime; pIfIoInt->pfnGetSize = vdIOIntGetSize; pIfIoInt->pfnSetSize = vdIOIntSetSize; - pIfIoInt->pfnReadSync = vdIOIntReadSync; - pIfIoInt->pfnWriteSync = vdIOIntWriteSync; - pIfIoInt->pfnFlushSync = vdIOIntFlushSync; - pIfIoInt->pfnReadUserAsync = vdIOIntReadUserAsync; - pIfIoInt->pfnWriteUserAsync = vdIOIntWriteUserAsync; - pIfIoInt->pfnReadMetaAsync = vdIOIntReadMetaAsync; - pIfIoInt->pfnWriteMetaAsync = vdIOIntWriteMetaAsync; + pIfIoInt->pfnReadUser = vdIOIntReadUser; + pIfIoInt->pfnWriteUser = vdIOIntWriteUser; + pIfIoInt->pfnReadMeta = vdIOIntReadMeta; + pIfIoInt->pfnWriteMeta = vdIOIntWriteMeta; pIfIoInt->pfnMetaXferRelease = vdIOIntMetaXferRelease; - pIfIoInt->pfnFlushAsync = vdIOIntFlushAsync; + pIfIoInt->pfnFlush = vdIOIntFlush; pIfIoInt->pfnIoCtxCopyFrom = vdIOIntIoCtxCopyFrom; pIfIoInt->pfnIoCtxCopyTo = vdIOIntIoCtxCopyTo; pIfIoInt->pfnIoCtxSet = vdIOIntIoCtxSet; pIfIoInt->pfnIoCtxSegArrayCreate = vdIOIntIoCtxSegArrayCreate; pIfIoInt->pfnIoCtxCompleted = vdIOIntIoCtxCompleted; + pIfIoInt->pfnIoCtxIsSynchronous = vdIOIntIoCtxIsSynchronous; + pIfIoInt->pfnIoCtxIsZero = vdIOIntIoCtxIsZero; +} + +/** + * Internally used completion handler for synchronous I/O contexts. + */ +static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq) +{ + PVBOXHDD pDisk = (PVBOXHDD)pvUser1; + + pDisk->rcSync = rcReq; + RTSemEventSignal(pDisk->hEventSemSyncIo); } /** @@ -5077,54 +5164,43 @@ VBOXDDU_DECL(int) VDCreate(PVDINTERFACE pVDIfsDisk, VDTYPE enmType, PVBOXHDD *pp pDisk = (PVBOXHDD)RTMemAllocZ(sizeof(VBOXHDD)); if (pDisk) { - pDisk->u32Signature = VBOXHDDDISK_SIGNATURE; - pDisk->enmType = enmType; - pDisk->cImages = 0; - pDisk->pBase = NULL; - pDisk->pLast = NULL; - pDisk->cbSize = 0; + pDisk->u32Signature = VBOXHDDDISK_SIGNATURE; + pDisk->enmType = enmType; + pDisk->cImages = 0; + pDisk->pBase = NULL; + pDisk->pLast = NULL; + pDisk->cbSize = 0; pDisk->PCHSGeometry.cCylinders = 0; pDisk->PCHSGeometry.cHeads = 0; pDisk->PCHSGeometry.cSectors = 0; pDisk->LCHSGeometry.cCylinders = 0; pDisk->LCHSGeometry.cHeads = 0; pDisk->LCHSGeometry.cSectors = 0; - pDisk->pVDIfsDisk = pVDIfsDisk; - pDisk->pInterfaceError = NULL; - pDisk->pInterfaceThreadSync = NULL; - pDisk->fLocked = false; - pDisk->pIoCtxLockOwner = NULL; - pDisk->pIoCtxHead = NULL; - RTListInit(&pDisk->ListWriteLocked); + pDisk->pVDIfsDisk = pVDIfsDisk; + pDisk->pInterfaceError = NULL; + pDisk->pInterfaceThreadSync = NULL; + pDisk->pIoCtxLockOwner = NULL; + pDisk->pIoCtxHead = NULL; + pDisk->fLocked = false; + pDisk->hEventSemSyncIo = NIL_RTSEMEVENT; + pDisk->hMemCacheIoCtx = NIL_RTMEMCACHE; + pDisk->hMemCacheIoTask = NIL_RTMEMCACHE; + + rc = RTSemEventCreate(&pDisk->hEventSemSyncIo); + if (RT_FAILURE(rc)) + break; /* Create the I/O ctx cache */ rc = RTMemCacheCreate(&pDisk->hMemCacheIoCtx, sizeof(VDIOCTX), 0, UINT32_MAX, NULL, NULL, NULL, 0); if (RT_FAILURE(rc)) - { - RTMemFree(pDisk); break; - } /* Create the I/O task cache */ rc = RTMemCacheCreate(&pDisk->hMemCacheIoTask, sizeof(VDIOTASK), 0, UINT32_MAX, NULL, NULL, NULL, 0); if (RT_FAILURE(rc)) - { - RTMemCacheDestroy(pDisk->hMemCacheIoCtx); - RTMemFree(pDisk); break; - } - - /* Create critical section. */ - rc = RTCritSectInit(&pDisk->CritSect); - if (RT_FAILURE(rc)) - { - RTMemCacheDestroy(pDisk->hMemCacheIoCtx); - RTMemCacheDestroy(pDisk->hMemCacheIoTask); - RTMemFree(pDisk); - break; - } pDisk->pInterfaceError = VDIfErrorGet(pVDIfsDisk); pDisk->pInterfaceThreadSync = VDIfThreadSyncGet(pVDIfsDisk); @@ -5138,6 +5214,17 @@ VBOXDDU_DECL(int) VDCreate(PVDINTERFACE pVDIfsDisk, VDTYPE enmType, PVBOXHDD *pp } } while (0); + if ( RT_FAILURE(rc) + && pDisk) + { + if (pDisk->hEventSemSyncIo != NIL_RTSEMEVENT) + RTSemEventDestroy(pDisk->hEventSemSyncIo); + if (pDisk->hMemCacheIoCtx != NIL_RTMEMCACHE) + RTMemCacheDestroy(pDisk->hMemCacheIoCtx); + if (pDisk->hMemCacheIoTask != NIL_RTMEMCACHE) + RTMemCacheDestroy(pDisk->hMemCacheIoTask); + } + LogFlowFunc(("returns %Rrc (pDisk=%#p)\n", rc, pDisk)); return rc; } @@ -5158,10 +5245,12 @@ VBOXDDU_DECL(int) VDDestroy(PVBOXHDD pDisk) /* sanity check */ AssertPtrBreak(pDisk); AssertMsg(pDisk->u32Signature == VBOXHDDDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature)); + Assert(!pDisk->fLocked); + rc = VDCloseAll(pDisk); - RTCritSectDelete(&pDisk->CritSect); RTMemCacheDestroy(pDisk->hMemCacheIoCtx); RTMemCacheDestroy(pDisk->hMemCacheIoTask); + RTSemEventDestroy(pDisk->hEventSemSyncIo); RTMemFree(pDisk); } while (0); LogFlowFunc(("returns %Rrc\n", rc)); @@ -5225,14 +5314,11 @@ VBOXDDU_DECL(int) VDGetFormat(PVDINTERFACE pVDIfsDisk, PVDINTERFACE pVDIfsImage, VDIfIoInt.pfnGetModificationTime = vdIOIntGetModificationTimeLimited; VDIfIoInt.pfnGetSize = vdIOIntGetSizeLimited; VDIfIoInt.pfnSetSize = vdIOIntSetSizeLimited; - VDIfIoInt.pfnReadSync = vdIOIntReadSyncLimited; - VDIfIoInt.pfnWriteSync = vdIOIntWriteSyncLimited; - VDIfIoInt.pfnFlushSync = vdIOIntFlushSyncLimited; - VDIfIoInt.pfnReadUserAsync = NULL; - VDIfIoInt.pfnWriteUserAsync = NULL; - VDIfIoInt.pfnReadMetaAsync = NULL; - VDIfIoInt.pfnWriteMetaAsync = NULL; - VDIfIoInt.pfnFlushAsync = NULL; + VDIfIoInt.pfnReadUser = vdIOIntReadUserLimited; + VDIfIoInt.pfnWriteUser = vdIOIntWriteUserLimited; + VDIfIoInt.pfnReadMeta = vdIOIntReadMetaLimited; + VDIfIoInt.pfnWriteMeta = vdIOIntWriteMetaLimited; + VDIfIoInt.pfnFlush = vdIOIntFlushLimited; rc = VDInterfaceAdd(&VDIfIoInt.Core, "VD_IOINT", VDINTERFACETYPE_IOINT, pInterfaceIo, sizeof(VDINTERFACEIOINT), &pVDIfsImage); AssertRC(rc); @@ -5358,6 +5444,10 @@ VBOXDDU_DECL(int) VDOpen(PVBOXHDD pDisk, const char *pszBackend, AssertMsgBreakStmt((uOpenFlags & ~VD_OPEN_FLAGS_MASK) == 0, ("uOpenFlags=%#x\n", uOpenFlags), rc = VERR_INVALID_PARAMETER); + AssertMsgBreakStmt( !(uOpenFlags & VD_OPEN_FLAGS_SKIP_CONSISTENCY_CHECKS) + || (uOpenFlags & VD_OPEN_FLAGS_READONLY), + ("uOpenFlags=%#x\n", uOpenFlags), + rc = VERR_INVALID_PARAMETER); /* * Destroy the current discard state first which might still have pending blocks @@ -5450,6 +5540,36 @@ VBOXDDU_DECL(int) VDOpen(PVBOXHDD pDisk, const char *pszBackend, pImage->pVDIfsImage, pDisk->enmType, &pImage->pBackendData); + /* + * If the image is corrupted and there is a repair method try to repair it + * first if it was openend in read-write mode and open again afterwards. + */ + if ( RT_UNLIKELY(rc == VERR_VD_IMAGE_CORRUPTED) + && !(uOpenFlags & VD_OPEN_FLAGS_READONLY) + && pImage->Backend->pfnRepair) + { + rc = pImage->Backend->pfnRepair(pszFilename, pDisk->pVDIfsDisk, pImage->pVDIfsImage, 0 /* fFlags */); + if (RT_SUCCESS(rc)) + rc = pImage->Backend->pfnOpen(pImage->pszFilename, + uOpenFlags & ~(VD_OPEN_FLAGS_HONOR_SAME | VD_OPEN_FLAGS_IGNORE_FLUSH | VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS), + pDisk->pVDIfsDisk, + pImage->pVDIfsImage, + pDisk->enmType, + &pImage->pBackendData); + else + { + rc = vdError(pDisk, rc, RT_SRC_POS, + N_("VD: error %Rrc repairing corrupted image file '%s'"), rc, pszFilename); + break; + } + } + else if (RT_UNLIKELY(rc == VERR_VD_IMAGE_CORRUPTED)) + { + rc = vdError(pDisk, rc, RT_SRC_POS, + N_("VD: Image file '%s' is corrupted and can't be opened"), pszFilename); + break; + } + /* If the open in read-write mode failed, retry in read-only mode. */ if (RT_FAILURE(rc)) { @@ -6619,7 +6739,11 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, unsigned uOpenFlags = pImageTo->Backend->pfnGetOpenFlags(pImageTo->pBackendData); if (uOpenFlags & VD_OPEN_FLAGS_READONLY) { - uOpenFlags &= ~VD_OPEN_FLAGS_READONLY; + /* + * Clear skip consistency checks because the image is made writable now and + * skipping consistency checks is only possible for readonly images. + */ + uOpenFlags &= ~(VD_OPEN_FLAGS_READONLY | VD_OPEN_FLAGS_SKIP_CONSISTENCY_CHECKS); rc = pImageTo->Backend->pfnSetOpenFlags(pImageTo->pBackendData, uOpenFlags); if (RT_FAILURE(rc)) @@ -6652,6 +6776,15 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, do { size_t cbThisRead = RT_MIN(VD_MERGE_BUFFER_SIZE, cbRemaining); + RTSGSEG SegmentBuf; + RTSGBUF SgBuf; + VDIOCTX IoCtx; + + SegmentBuf.pvSeg = pvBuf; + SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE; + RTSgBufInit(&SgBuf, &SegmentBuf, 1); + vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, + &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); /* Need to hold the write lock during a read-write operation. */ rc2 = vdThreadStartWrite(pDisk); @@ -6659,8 +6792,8 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, fLockWrite = true; rc = pImageTo->Backend->pfnRead(pImageTo->pBackendData, - uOffset, pvBuf, cbThisRead, - &cbThisRead); + uOffset, cbThisRead, + &IoCtx, &cbThisRead); if (rc == VERR_VD_BLOCK_FREE) { /* Search for image with allocated block. Do not attempt to @@ -6672,9 +6805,8 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, pCurrImage = pCurrImage->pPrev) { rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, - uOffset, pvBuf, - cbThisRead, - &cbThisRead); + uOffset, cbThisRead, + &IoCtx, &cbThisRead); } if (rc != VERR_VD_BLOCK_FREE) @@ -6684,7 +6816,7 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, /* Updating the cache is required because this might be a live merge. */ rc = vdWriteHelperEx(pDisk, pImageTo, pImageFrom->pPrev, uOffset, pvBuf, cbThisRead, - true /* fUpdateCache */, 0); + VDIOCTX_FLAGS_READ_UPDATE_CACHE, 0); if (RT_FAILURE(rc)) break; } @@ -6773,8 +6905,18 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, do { size_t cbThisRead = RT_MIN(VD_MERGE_BUFFER_SIZE, cbRemaining); + RTSGSEG SegmentBuf; + RTSGBUF SgBuf; + VDIOCTX IoCtx; + rc = VERR_VD_BLOCK_FREE; + SegmentBuf.pvSeg = pvBuf; + SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE; + RTSgBufInit(&SgBuf, &SegmentBuf, 1); + vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, 0, 0, NULL, + &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC); + /* Need to hold the write lock during a read-write operation. */ rc2 = vdThreadStartWrite(pDisk); AssertRC(rc2); @@ -6789,8 +6931,8 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, pCurrImage = pCurrImage->pPrev) { rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData, - uOffset, pvBuf, - cbThisRead, &cbThisRead); + uOffset, cbThisRead, + &IoCtx, &cbThisRead); } if (rc != VERR_VD_BLOCK_FREE) @@ -6798,7 +6940,7 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, if (RT_FAILURE(rc)) break; rc = vdWriteHelper(pDisk, pImageTo, uOffset, pvBuf, - cbThisRead, true /* fUpdateCache */); + cbThisRead, VDIOCTX_FLAGS_READ_UPDATE_CACHE); if (RT_FAILURE(rc)) break; } @@ -6853,15 +6995,27 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, AssertRC(rc2); fLockWrite = true; - /* Update parent UUID so that image chain is consistent. */ + /* Update parent UUID so that image chain is consistent. + * The two attempts work around the problem that some backends + * (e.g. iSCSI) do not support UUIDs, so we exploit the fact that + * so far there can only be one such image in the chain. */ + /** @todo needs a better long-term solution, passing the UUID + * knowledge from the caller or some such */ RTUUID Uuid; PVDIMAGE pImageChild = NULL; if (nImageFrom < nImageTo) { if (pImageFrom->pPrev) { + /* plan A: ask the parent itself for its UUID */ rc = pImageFrom->pPrev->Backend->pfnGetUuid(pImageFrom->pPrev->pBackendData, &Uuid); + if (RT_FAILURE(rc)) + { + /* plan B: ask the child of the parent for parent UUID */ + rc = pImageFrom->Backend->pfnGetParentUuid(pImageFrom->pBackendData, + &Uuid); + } AssertRC(rc); } else @@ -6875,8 +7029,15 @@ VBOXDDU_DECL(int) VDMerge(PVBOXHDD pDisk, unsigned nImageFrom, /* Update the parent uuid of the child of the last merged image. */ if (pImageFrom->pNext) { + /* plan A: ask the parent itself for its UUID */ rc = pImageTo->Backend->pfnGetUuid(pImageTo->pBackendData, &Uuid); + if (RT_FAILURE(rc)) + { + /* plan B: ask the child of the parent for parent UUID */ + rc = pImageTo->pNext->Backend->pfnGetParentUuid(pImageTo->pNext->pBackendData, + &Uuid); + } AssertRC(rc); rc = pImageFrom->Backend->pfnSetParentUuid(pImageFrom->pNext->pBackendData, @@ -7486,11 +7647,11 @@ VBOXDDU_DECL(int) VDResize(PVBOXHDD pDisk, uint64_t cbSize, AssertRC(rc2); fLockRead = true; - /* Not supported if the disk has child images attached. */ - AssertMsgBreakStmt(pDisk->cImages == 1, ("cImages=%u\n", pDisk->cImages), + /* Must have at least one image in the chain, will resize last. */ + AssertMsgBreakStmt(pDisk->cImages >= 1, ("cImages=%u\n", pDisk->cImages), rc = VERR_NOT_SUPPORTED); - PVDIMAGE pImage = pDisk->pBase; + PVDIMAGE pImage = pDisk->pLast; /* If there is no compact callback for not file based backends then * the backend doesn't need compaction. No need to make much fuss about @@ -7571,6 +7732,8 @@ VBOXDDU_DECL(int) VDResize(PVBOXHDD pDisk, uint64_t cbSize, { if (pIfProgress && pIfProgress->pfnProgress) pIfProgress->pfnProgress(pIfProgress->Core.pvUser, 100); + + pDisk->cbSize = cbSize; } LogFlowFunc(("returns %Rrc\n", rc)); @@ -7839,14 +8002,23 @@ VBOXDDU_DECL(int) VDRead(PVBOXHDD pDisk, uint64_t uOffset, void *pvBuf, AssertRC(rc2); fLockRead = true; - AssertMsgBreakStmt(uOffset + cbRead <= pDisk->cbSize, - ("uOffset=%llu cbRead=%zu pDisk->cbSize=%llu\n", - uOffset, cbRead, pDisk->cbSize), - rc = VERR_INVALID_PARAMETER); - PVDIMAGE pImage = pDisk->pLast; AssertPtrBreakStmt(pImage, rc = VERR_VD_NOT_OPENED); + if (uOffset + cbRead > pDisk->cbSize) + { + /* Floppy images might be smaller than the standard expected by + the floppy controller code. So, we won't fail here. */ + AssertMsgBreakStmt(pDisk->enmType == VDTYPE_FLOPPY, + ("uOffset=%llu cbRead=%zu pDisk->cbSize=%llu\n", + uOffset, cbRead, pDisk->cbSize), + rc = VERR_EOF); + memset(pvBuf, 0xf6, cbRead); /* f6h = format.com filler byte */ + if (uOffset >= pDisk->cbSize) + break; + cbRead = pDisk->cbSize - uOffset; + } + rc = vdReadHelper(pDisk, pImage, uOffset, pvBuf, cbRead, true /* fUpdateCache */); } while (0); @@ -7909,7 +8081,7 @@ VBOXDDU_DECL(int) VDWrite(PVBOXHDD pDisk, uint64_t uOffset, const void *pvBuf, vdSetModifiedFlag(pDisk); rc = vdWriteHelper(pDisk, pImage, uOffset, pvBuf, cbWrite, - true /* fUpdateCache */); + VDIOCTX_FLAGS_READ_UPDATE_CACHE); if (RT_FAILURE(rc)) break; @@ -7923,7 +8095,7 @@ VBOXDDU_DECL(int) VDWrite(PVBOXHDD pDisk, uint64_t uOffset, const void *pvBuf, * as this write is covered by the previous one. */ if (RT_UNLIKELY(pDisk->pImageRelay)) rc = vdWriteHelper(pDisk, pDisk->pImageRelay, uOffset, - pvBuf, cbWrite, false /* fUpdateCache */); + pvBuf, cbWrite, VDIOCTX_FLAGS_DEFAULT); } while (0); if (RT_UNLIKELY(fLockWrite)) @@ -7963,12 +8135,19 @@ VBOXDDU_DECL(int) VDFlush(PVBOXHDD pDisk) PVDIMAGE pImage = pDisk->pLast; AssertPtrBreakStmt(pImage, rc = VERR_VD_NOT_OPENED); - vdResetModifiedFlag(pDisk); - rc = pImage->Backend->pfnFlush(pImage->pBackendData); + PVDIOCTX pIoCtx = vdIoCtxRootAlloc(pDisk, VDIOCTXTXDIR_FLUSH, 0, + 0, pDisk->pLast, NULL, + vdIoCtxSyncComplete, pDisk, NULL, + NULL, vdFlushHelperAsync, + VDIOCTX_FLAGS_SYNC); - if ( RT_SUCCESS(rc) - && pDisk->pCache) - rc = pDisk->pCache->Backend->pfnFlush(pDisk->pCache->pBackendData); + if (!pIoCtx) + { + rc = VERR_NO_MEMORY; + break; + } + + rc = vdIoCtxProcessSync(pIoCtx); } while (0); if (RT_UNLIKELY(fLockWrite)) @@ -8060,6 +8239,46 @@ VBOXDDU_DECL(bool) VDIsReadOnly(PVBOXHDD pDisk) } /** + * Get sector size of an image in HDD container. + * + * @return Virtual disk sector size in bytes. + * @return 0 if image with specified number was not opened. + * @param pDisk Pointer to HDD container. + * @param nImage Image number, counts from 0. 0 is always base image of container. + */ +VBOXDDU_DECL(uint32_t) VDGetSectorSize(PVBOXHDD pDisk, unsigned nImage) +{ + uint64_t cbSector; + int rc2; + bool fLockRead = false; + + LogFlowFunc(("pDisk=%#p nImage=%u\n", pDisk, nImage)); + do + { + /* sanity check */ + AssertPtrBreakStmt(pDisk, cbSector = 0); + AssertMsg(pDisk->u32Signature == VBOXHDDDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature)); + + rc2 = vdThreadStartRead(pDisk); + AssertRC(rc2); + fLockRead = true; + + PVDIMAGE pImage = vdGetImageByNumber(pDisk, nImage); + AssertPtrBreakStmt(pImage, cbSector = 0); + cbSector = pImage->Backend->pfnGetSectorSize(pImage->pBackendData); + } while (0); + + if (RT_UNLIKELY(fLockRead)) + { + rc2 = vdThreadFinishRead(pDisk); + AssertRC(rc2); + } + + LogFlowFunc(("returns %u\n", cbSector)); + return cbSector; +} + +/** * Get total capacity of an image in HDD container. * * @returns Virtual disk size in bytes. @@ -9282,8 +9501,17 @@ VBOXDDU_DECL(int) VDDiscardRanges(PVBOXHDD pDisk, PCRTRANGE paRanges, unsigned c ("Discarding not supported\n"), rc = VERR_NOT_SUPPORTED); - vdSetModifiedFlag(pDisk); - rc = vdDiscardHelper(pDisk, paRanges, cRanges); + PVDIOCTX pIoCtx = vdIoCtxDiscardAlloc(pDisk, paRanges, cRanges, + vdIoCtxSyncComplete, pDisk, NULL, NULL, + vdDiscardHelperAsync, + VDIOCTX_FLAGS_SYNC); + if (!pIoCtx) + { + rc = VERR_NO_MEMORY; + break; + } + + rc = vdIoCtxProcessSync(pIoCtx); } while (0); if (RT_UNLIKELY(fLockWrite)) @@ -9337,18 +9565,15 @@ VBOXDDU_DECL(int) VDAsyncRead(PVBOXHDD pDisk, uint64_t uOffset, size_t cbRead, pIoCtx = vdIoCtxRootAlloc(pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pDisk->pLast, pcSgBuf, pfnComplete, pvUser1, pvUser2, - NULL, vdReadHelperAsync); + NULL, vdReadHelperAsync, + VDIOCTX_FLAGS_ZERO_FREE_BLOCKS); if (!pIoCtx) { rc = VERR_NO_MEMORY; break; } -#if 0 rc = vdIoCtxProcessTryLockDefer(pIoCtx); -#else - rc = vdIoCtxProcess(pIoCtx); -#endif if (rc == VINF_VD_ASYNC_IO_FINISHED) { if (ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false)) @@ -9412,18 +9637,15 @@ VBOXDDU_DECL(int) VDAsyncWrite(PVBOXHDD pDisk, uint64_t uOffset, size_t cbWrite, pIoCtx = vdIoCtxRootAlloc(pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pDisk->pLast, pcSgBuf, pfnComplete, pvUser1, pvUser2, - NULL, vdWriteHelperAsync); + NULL, vdWriteHelperAsync, + VDIOCTX_FLAGS_DEFAULT); if (!pIoCtx) { rc = VERR_NO_MEMORY; break; } -#if 0 rc = vdIoCtxProcessTryLockDefer(pIoCtx); -#else - rc = vdIoCtxProcess(pIoCtx); -#endif if (rc == VINF_VD_ASYNC_IO_FINISHED) { if (ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false)) @@ -9472,18 +9694,15 @@ VBOXDDU_DECL(int) VDAsyncFlush(PVBOXHDD pDisk, PFNVDASYNCTRANSFERCOMPLETE pfnCom pIoCtx = vdIoCtxRootAlloc(pDisk, VDIOCTXTXDIR_FLUSH, 0, 0, pDisk->pLast, NULL, pfnComplete, pvUser1, pvUser2, - NULL, vdFlushHelperAsync); + NULL, vdFlushHelperAsync, + VDIOCTX_FLAGS_DEFAULT); if (!pIoCtx) { rc = VERR_NO_MEMORY; break; } -#if 0 rc = vdIoCtxProcessTryLockDefer(pIoCtx); -#else - rc = vdIoCtxProcess(pIoCtx); -#endif if (rc == VINF_VD_ASYNC_IO_FINISHED) { if (ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false)) @@ -9531,18 +9750,15 @@ VBOXDDU_DECL(int) VDAsyncDiscardRanges(PVBOXHDD pDisk, PCRTRANGE paRanges, unsig pIoCtx = vdIoCtxDiscardAlloc(pDisk, paRanges, cRanges, pfnComplete, pvUser1, pvUser2, NULL, - vdDiscardHelperAsync); + vdDiscardHelperAsync, + VDIOCTX_FLAGS_DEFAULT); if (!pIoCtx) { rc = VERR_NO_MEMORY; break; } -#if 0 rc = vdIoCtxProcessTryLockDefer(pIoCtx); -#else - rc = vdIoCtxProcess(pIoCtx); -#endif if (rc == VINF_VD_ASYNC_IO_FINISHED) { if (ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false)) @@ -9608,14 +9824,11 @@ VBOXDDU_DECL(int) VDRepair(PVDINTERFACE pVDIfsDisk, PVDINTERFACE pVDIfsImage, VDIfIoInt.pfnGetModificationTime = vdIOIntGetModificationTimeLimited; VDIfIoInt.pfnGetSize = vdIOIntGetSizeLimited; VDIfIoInt.pfnSetSize = vdIOIntSetSizeLimited; - VDIfIoInt.pfnReadSync = vdIOIntReadSyncLimited; - VDIfIoInt.pfnWriteSync = vdIOIntWriteSyncLimited; - VDIfIoInt.pfnFlushSync = vdIOIntFlushSyncLimited; - VDIfIoInt.pfnReadUserAsync = NULL; - VDIfIoInt.pfnWriteUserAsync = NULL; - VDIfIoInt.pfnReadMetaAsync = NULL; - VDIfIoInt.pfnWriteMetaAsync = NULL; - VDIfIoInt.pfnFlushAsync = NULL; + VDIfIoInt.pfnReadUser = vdIOIntReadUserLimited; + VDIfIoInt.pfnWriteUser = vdIOIntWriteUserLimited; + VDIfIoInt.pfnReadMeta = vdIOIntReadMetaLimited; + VDIfIoInt.pfnWriteMeta = vdIOIntWriteMetaLimited; + VDIfIoInt.pfnFlush = vdIOIntFlushLimited; rc = VDInterfaceAdd(&VDIfIoInt.Core, "VD_IOINT", VDINTERFACETYPE_IOINT, pInterfaceIo, sizeof(VDINTERFACEIOINT), &pVDIfsImage); AssertRC(rc); |