diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-18 12:50:18 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-18 12:50:18 -0800 |
commit | a0b96314870f7eff6d15a242cb162dfc46b3c284 (patch) | |
tree | a78c32561160ac99a835ee1d28d4a33c4332be55 /fs | |
parent | 4862c741bd440813cabc5e93351f0950c1cb19d9 (diff) | |
parent | e82226138b20d4f638426413e83c6b5db532c6a2 (diff) | |
download | linux-a0b96314870f7eff6d15a242cb162dfc46b3c284.tar.gz |
Merge tag 'xfs-5.11-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong:
"In this release we add the ability to set a 'needsrepair' flag
indicating that we /know/ the filesystem requires xfs_repair, but
other than that, it's the usual strengthening of metadata validation
and miscellaneous cleanups.
Summary:
- Introduce a "needsrepair" "feature" to flag a filesystem as needing
a pass through xfs_repair. This is key to enabling filesystem
upgrades (in xfs_db) that require xfs_repair to make minor
adjustments to metadata.
- Refactor parameter checking of recovered log intent items so that
we actually use the same validation code as them that generate the
intent items.
- Various fixes to online scrub not reacting correctly to directory
entries pointing to inodes that cannot be igetted.
- Refactor validation helpers for data and rt volume extents.
- Refactor XFS_TRANS_DQ_DIRTY out of existence.
- Fix a longstanding bug where mounting with "uqnoenforce" would
start user quotas in non-enforcing mode but /proc/mounts would
display "usrquota", implying that they are being enforced.
- Don't flag dax+reflink inodes as corruption since that is a valid
(but not fully functional) combination right now.
- Clean up raid stripe validation functions.
- Refactor the inode allocation code to be more straightforward.
- Small prep cleanup for idmapping support.
- Get rid of the xfs_buf_t typedef"
* tag 'xfs-5.11-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (40 commits)
xfs: remove xfs_buf_t typedef
fs/xfs: convert comma to semicolon
xfs: open code updating i_mode in xfs_set_acl
xfs: remove xfs_vn_setattr_nonsize
xfs: kill ialloced in xfs_dialloc()
xfs: spilt xfs_dialloc() into 2 functions
xfs: move xfs_dialloc_roll() into xfs_dialloc()
xfs: move on-disk inode allocation out of xfs_ialloc()
xfs: introduce xfs_dialloc_roll()
xfs: convert noroom, okalloc in xfs_dialloc() to bool
xfs: don't catch dax+reflink inodes as corruption in verifier
xfs: fix the forward progress assertion in xfs_iwalk_run_callbacks
xfs: remove unneeded return value check for *init_cursor()
xfs: introduce xfs_validate_stripe_geometry()
xfs: show the proper user quota options
xfs: remove the unused XFS_B_FSB_OFFSET macro
xfs: remove unnecessary null check in xfs_generic_create
xfs: directly return if the delta equal to zero
xfs: check tp->t_dqinfo value instead of the XFS_TRANS_DQ_DIRTY flag
xfs: delete duplicated tp->t_dqinfo null check and allocation
...
Diffstat (limited to 'fs')
48 files changed, 695 insertions, 705 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 15640015be9d..7cb9f064ac64 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -690,9 +690,9 @@ xfs_alloc_read_agfl( xfs_mount_t *mp, /* mount point structure */ xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno, /* allocation group number */ - xfs_buf_t **bpp) /* buffer for the ag free block array */ + struct xfs_buf **bpp) /* buffer for the ag free block array */ { - xfs_buf_t *bp; /* return value */ + struct xfs_buf *bp; /* return value */ int error; ASSERT(agno != NULLAGNUMBER); @@ -2647,12 +2647,12 @@ out_no_agbp: int /* error */ xfs_alloc_get_freelist( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer containing the agf structure */ + struct xfs_buf *agbp, /* buffer containing the agf structure */ xfs_agblock_t *bnop, /* block address retrieved from freelist */ int btreeblk) /* destination is a AGF btree */ { struct xfs_agf *agf = agbp->b_addr; - xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ + struct xfs_buf *agflbp;/* buffer for a.g. freelist structure */ xfs_agblock_t bno; /* block number returned */ __be32 *agfl_bno; int error; @@ -2711,7 +2711,7 @@ xfs_alloc_get_freelist( void xfs_alloc_log_agf( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer for a.g. freelist header */ + struct xfs_buf *bp, /* buffer for a.g. freelist header */ int fields) /* mask of fields to be logged (XFS_AGF_...) */ { int first; /* first byte offset */ @@ -2757,7 +2757,7 @@ xfs_alloc_pagf_init( xfs_agnumber_t agno, /* allocation group number */ int flags) /* XFS_ALLOC_FLAGS_... */ { - xfs_buf_t *bp; + struct xfs_buf *bp; int error; error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp); @@ -2772,8 +2772,8 @@ xfs_alloc_pagf_init( int /* error */ xfs_alloc_put_freelist( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer for a.g. freelist header */ - xfs_buf_t *agflbp,/* buffer for a.g. free block array */ + struct xfs_buf *agbp, /* buffer for a.g. freelist header */ + struct xfs_buf *agflbp,/* buffer for a.g. free block array */ xfs_agblock_t bno, /* block being freed */ int btreeblk) /* block came from a AGF btree */ { diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d9a692484eae..bc446418e227 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -321,7 +321,7 @@ xfs_bmap_check_leaf_extents( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ + struct xfs_buf *bp; /* buffer for "block" */ int error; /* error return value */ xfs_extnum_t i=0, j; /* index into the extents list */ int level; /* btree level, for checking */ @@ -592,7 +592,7 @@ xfs_bmap_btree_to_extents( struct xfs_btree_block *rblock = ifp->if_broot; struct xfs_btree_block *cblock;/* child btree block */ xfs_fsblock_t cbno; /* child block number */ - xfs_buf_t *cbp; /* child block's buffer */ + struct xfs_buf *cbp; /* child block's buffer */ int error; /* error return value */ __be64 *pp; /* ptr to block address */ struct xfs_owner_info oinfo; @@ -830,7 +830,7 @@ xfs_bmap_local_to_extents( int flags; /* logging flags returned */ struct xfs_ifork *ifp; /* inode fork pointer */ xfs_alloc_arg_t args; /* allocation arguments */ - xfs_buf_t *bp; /* buffer for extent block */ + struct xfs_buf *bp; /* buffer for extent block */ struct xfs_bmbt_irec rec; struct xfs_iext_cursor icur; @@ -6226,23 +6226,17 @@ xfs_bmap_validate_extent( struct xfs_bmbt_irec *irec) { struct xfs_mount *mp = ip->i_mount; - xfs_fsblock_t endfsb; - bool isrt; - isrt = XFS_IS_REALTIME_INODE(ip); - endfsb = irec->br_startblock + irec->br_blockcount - 1; - if (isrt && whichfork == XFS_DATA_FORK) { - if (!xfs_verify_rtbno(mp, irec->br_startblock)) - return __this_address; - if (!xfs_verify_rtbno(mp, endfsb)) + if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) + return __this_address; + + if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) { + if (!xfs_verify_rtext(mp, irec->br_startblock, + irec->br_blockcount)) return __this_address; } else { - if (!xfs_verify_fsbno(mp, irec->br_startblock)) - return __this_address; - if (!xfs_verify_fsbno(mp, endfsb)) - return __this_address; - if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) != - XFS_FSB_TO_AGNO(mp, endfsb)) + if (!xfs_verify_fsbext(mp, irec->br_startblock, + irec->br_blockcount)) return __this_address; } if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index ecec604e6e4d..976659190d27 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -639,8 +639,6 @@ xfs_bmbt_change_owner( ASSERT(XFS_IFORK_PTR(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); - if (!cur) - return -ENOMEM; cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER; error = xfs_btree_change_owner(cur, new_owner, buffer_list); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2d25bab68764..c4d7a9241dc3 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -397,7 +397,7 @@ xfs_btree_dup_cursor( xfs_btree_cur_t *cur, /* input cursor */ xfs_btree_cur_t **ncur) /* output cursor */ { - xfs_buf_t *bp; /* btree block's buffer pointer */ + struct xfs_buf *bp; /* btree block's buffer pointer */ int error; /* error return value */ int i; /* level number of btree block */ xfs_mount_t *mp; /* mount structure for filesystem */ @@ -701,7 +701,7 @@ xfs_btree_firstrec( int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ /* * Get the block pointer for this level. @@ -731,7 +731,7 @@ xfs_btree_lastrec( int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ /* * Get the block pointer for this level. @@ -993,7 +993,7 @@ STATIC void xfs_btree_setbuf( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ - xfs_buf_t *bp) /* new buffer to set */ + struct xfs_buf *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ @@ -1636,7 +1636,7 @@ xfs_btree_decrement( int *stat) /* success/failure */ { struct xfs_btree_block *block; - xfs_buf_t *bp; + struct xfs_buf *bp; int error; /* error return value */ int lev; union xfs_btree_ptr ptr; @@ -4070,7 +4070,7 @@ xfs_btree_delrec( * surviving block, and log it. */ xfs_btree_set_numrecs(left, lrecs + rrecs); - xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), + xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB); xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index dd764da08f6f..630388b72dbe 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -468,11 +468,13 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */ #define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */ #define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */ +#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE| \ XFS_SB_FEAT_INCOMPAT_SPINODES| \ XFS_SB_FEAT_INCOMPAT_META_UUID| \ - XFS_SB_FEAT_INCOMPAT_BIGTIME) + XFS_SB_FEAT_INCOMPAT_BIGTIME| \ + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool @@ -584,6 +586,12 @@ static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT); } +static inline bool xfs_sb_version_needsrepair(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR); +} + /* * end of superblock version macros */ @@ -625,7 +633,6 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) #define XFS_B_TO_FSB(mp,b) \ ((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) #define XFS_B_TO_FSBT(mp,b) (((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) -#define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask) /* * Allocation group header diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 974e71bc4a3a..69b228fce81a 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -607,13 +607,13 @@ error: /* * Allocate new inodes in the allocation group specified by agbp. - * Return 0 for success, else error code. + * Returns 0 if inodes were allocated in this AG; 1 if there was no space + * in this AG; or the usual negative error code. */ STATIC int xfs_ialloc_ag_alloc( struct xfs_trans *tp, - struct xfs_buf *agbp, - int *alloc) + struct xfs_buf *agbp) { struct xfs_agi *agi; struct xfs_alloc_arg args; @@ -795,10 +795,9 @@ sparse_alloc: allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1; } - if (args.fsbno == NULLFSBLOCK) { - *alloc = 0; - return 0; - } + if (args.fsbno == NULLFSBLOCK) + return 1; + ASSERT(args.len == args.minlen); /* @@ -903,7 +902,6 @@ sparse_alloc: */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); - *alloc = 1; return 0; } @@ -1570,7 +1568,7 @@ xfs_dialloc_ag_update_inobt( * The caller selected an AG for us, and made sure that free inodes are * available. */ -STATIC int +int xfs_dialloc_ag( struct xfs_trans *tp, struct xfs_buf *agbp, @@ -1682,65 +1680,78 @@ error_cur: return error; } +static int +xfs_dialloc_roll( + struct xfs_trans **tpp, + struct xfs_buf *agibp) +{ + struct xfs_trans *tp = *tpp; + struct xfs_dquot_acct *dqinfo; + int error; + + /* + * Hold to on to the agibp across the commit so no other allocation can + * come in and take the free inodes we just allocated for our caller. + */ + xfs_trans_bhold(tp, agibp); + + /* + * We want the quota changes to be associated with the next transaction, + * NOT this one. So, detach the dqinfo from this and attach it to the + * next transaction. + */ + dqinfo = tp->t_dqinfo; + tp->t_dqinfo = NULL; + + error = xfs_trans_roll(&tp); + + /* Re-attach the quota info that we detached from prev trx. */ + tp->t_dqinfo = dqinfo; + + *tpp = tp; + if (error) + return error; + xfs_trans_bjoin(tp, agibp); + return 0; +} + /* - * Allocate an inode on disk. - * - * Mode is used to tell whether the new inode will need space, and whether it - * is a directory. + * Select and prepare an AG for inode allocation. * - * This function is designed to be called twice if it has to do an allocation - * to make more free inodes. On the first call, *IO_agbp should be set to NULL. - * If an inode is available without having to performn an allocation, an inode - * number is returned. In this case, *IO_agbp is set to NULL. If an allocation - * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. - * The caller should then commit the current transaction, allocate a - * new transaction, and call xfs_dialloc() again, passing in the previous value - * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI - * buffer is locked across the two calls, the second call is guaranteed to have - * a free inode available. + * Mode is used to tell whether the new inode is a directory and hence where to + * locate it. * - * Once we successfully pick an inode its number is returned and the on-disk - * data structures are updated. The inode itself is not read in, since doing so - * would break ordering constraints with xfs_reclaim. + * This function will ensure that the selected AG has free inodes available to + * allocate from. The selected AGI will be returned locked to the caller, and it + * will allocate more free inodes if required. If no free inodes are found or + * can be allocated, no AGI will be returned. */ int -xfs_dialloc( - struct xfs_trans *tp, +xfs_dialloc_select_ag( + struct xfs_trans **tpp, xfs_ino_t parent, umode_t mode, - struct xfs_buf **IO_agbp, - xfs_ino_t *inop) + struct xfs_buf **IO_agbp) { - struct xfs_mount *mp = tp->t_mountp; + struct xfs_mount *mp = (*tpp)->t_mountp; struct xfs_buf *agbp; xfs_agnumber_t agno; int error; - int ialloced; - int noroom = 0; + bool noroom = false; xfs_agnumber_t start_agno; struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); - int okalloc = 1; + bool okalloc = true; - if (*IO_agbp) { - /* - * If the caller passes in a pointer to the AGI buffer, - * continue where we left off before. In this case, we - * know that the allocation group has free inodes. - */ - agbp = *IO_agbp; - goto out_alloc; - } + *IO_agbp = NULL; /* * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - start_agno = xfs_ialloc_ag_select(tp, parent, mode); - if (start_agno == NULLAGNUMBER) { - *inop = NULLFSINO; + start_agno = xfs_ialloc_ag_select(*tpp, parent, mode); + if (start_agno == NULLAGNUMBER) return 0; - } /* * If we have already hit the ceiling of inode blocks then clear @@ -1753,8 +1764,8 @@ xfs_dialloc( if (igeo->maxicount && percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos > igeo->maxicount) { - noroom = 1; - okalloc = 0; + noroom = true; + okalloc = false; } /* @@ -1771,9 +1782,9 @@ xfs_dialloc( } if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, tp, agno); + error = xfs_ialloc_pagi_init(mp, *tpp, agno); if (error) - goto out_error; + break; } /* @@ -1786,64 +1797,59 @@ xfs_dialloc( * Then read in the AGI buffer and recheck with the AGI buffer * lock held. */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + error = xfs_ialloc_read_agi(mp, *tpp, agno, &agbp); if (error) - goto out_error; + break; if (pag->pagi_freecount) { xfs_perag_put(pag); - goto out_alloc; + goto found_ag; } if (!okalloc) goto nextag_relse_buffer; + error = xfs_ialloc_ag_alloc(*tpp, agbp); + if (error < 0) { + xfs_trans_brelse(*tpp, agbp); - error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); - if (error) { - xfs_trans_brelse(tp, agbp); - - if (error != -ENOSPC) - goto out_error; - - xfs_perag_put(pag); - *inop = NULLFSINO; - return 0; + if (error == -ENOSPC) + error = 0; + break; } - if (ialloced) { + if (error == 0) { /* - * We successfully allocated some inodes, return - * the current context to the caller so that it - * can commit the current transaction and call - * us again where we left off. + * We successfully allocated space for an inode cluster + * in this AG. Roll the transaction so that we can + * allocate one of the new inodes. */ ASSERT(pag->pagi_freecount > 0); xfs_perag_put(pag); - *IO_agbp = agbp; - *inop = NULLFSINO; - return 0; + error = xfs_dialloc_roll(tpp, agbp); + if (error) { + xfs_buf_relse(agbp); + return error; + } + goto found_ag; } nextag_relse_buffer: - xfs_trans_brelse(tp, agbp); + xfs_trans_brelse(*tpp, agbp); nextag: xfs_perag_put(pag); if (++agno == mp->m_sb.sb_agcount) agno = 0; - if (agno == start_agno) { - *inop = NULLFSINO; + if (agno == start_agno) return noroom ? -ENOSPC : 0; - } } -out_alloc: - *IO_agbp = NULL; - return xfs_dialloc_ag(tp, agbp, parent, inop); -out_error: xfs_perag_put(pag); return error; +found_ag: + *IO_agbp = agbp; + return 0; } /* @@ -2453,7 +2459,7 @@ out_map: void xfs_ialloc_log_agi( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* allocation group header buffer */ + struct xfs_buf *bp, /* allocation group header buffer */ int fields) /* bitmask of fields to log */ { int first; /* first byte number */ @@ -2674,7 +2680,7 @@ xfs_ialloc_pagi_init( xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno) /* allocation group number */ { - xfs_buf_t *bp = NULL; + struct xfs_buf *bp = NULL; int error; error = xfs_ialloc_read_agi(mp, tp, agno, &bp); diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 72b3468b97b1..3511086a7ae1 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -37,30 +37,26 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) * Mode is used to tell whether the new inode will need space, and whether * it is a directory. * - * To work within the constraint of one allocation per transaction, - * xfs_dialloc() is designed to be called twice if it has to do an - * allocation to make more free inodes. If an inode is - * available without an allocation, agbp would be set to the current - * agbp and alloc_done set to false. - * If an allocation needed to be done, agbp would be set to the - * inode header of the allocation group and alloc_done set to true. - * The caller should then commit the current transaction and allocate a new - * transaction. xfs_dialloc() should then be called again with - * the agbp value returned from the previous call. - * - * Once we successfully pick an inode its number is returned and the - * on-disk data structures are updated. The inode itself is not read - * in, since doing so would break ordering constraints with xfs_reclaim. - * - * *agbp should be set to NULL on the first call, *alloc_done set to FALSE. + * There are two phases to inode allocation: selecting an AG and ensuring + * that it contains free inodes, followed by allocating one of the free + * inodes. xfs_dialloc_select_ag() does the former and returns a locked AGI + * to the caller, ensuring that followup call to xfs_dialloc_ag() will + * have free inodes to allocate from. xfs_dialloc_ag() will return the inode + * number of the free inode we allocated. */ int /* error */ -xfs_dialloc( - struct xfs_trans *tp, /* transaction pointer */ +xfs_dialloc_select_ag( + struct xfs_trans **tpp, /* double pointer of transaction */ xfs_ino_t parent, /* parent inode (directory) */ umode_t mode, /* mode bits for new inode */ - struct xfs_buf **agbp, /* buf for a.g. inode header */ - xfs_ino_t *inop); /* inode number allocated */ + struct xfs_buf **IO_agbp); + +int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop); /* * Free disk inode. Carefully avoids touching the incore inode, all diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index cc919a2ee870..4c5831646bd9 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -672,11 +672,6 @@ xfs_inobt_cur( return error; cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); - if (!cur) { - xfs_trans_brelse(tp, *agi_bpp); - *agi_bpp = NULL; - return -ENOMEM; - } *curpp = cur; return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index c667c63f2cb0..4d7410e49db4 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -547,10 +547,6 @@ xfs_dinode_verify( if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) return __this_address; - /* don't let reflink and dax mix */ - if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) - return __this_address; - /* COW extent size hint validation */ fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), mode, flags, flags2); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 2076627243b0..2037b9f23069 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1179,10 +1179,6 @@ xfs_refcount_finish_one( return error; rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); - if (!rcur) { - error = -ENOMEM; - goto out_cur; - } rcur->bc_ag.refc.nr_ops = nr_ops; rcur->bc_ag.refc.shape_changes = shape_changes; } @@ -1217,11 +1213,6 @@ xfs_refcount_finish_one( trace_xfs_refcount_finish_one_leftover(mp, agno, type, bno, blockcount, new_agbno, *new_len); return error; - -out_cur: - xfs_trans_brelse(tp, agbp); - - return error; } /* diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 2668ebe02865..10e0cf9949a2 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2404,10 +2404,6 @@ xfs_rmap_finish_one( return -EFSCORRUPTED; rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); - if (!rcur) { - error = -ENOMEM; - goto out_cur; - } } *pcur = rcur; @@ -2446,11 +2442,6 @@ xfs_rmap_finish_one( error = -EFSCORRUPTED; } return error; - -out_cur: - xfs_trans_brelse(tp, agbp); - - return error; } /* diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 6c1aba16113c..fe3a49575ff3 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -56,9 +56,9 @@ xfs_rtbuf_get( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t block, /* block number in bitmap or summary */ int issum, /* is summary not bitmap */ - xfs_buf_t **bpp) /* output: buffer for the block */ + struct xfs_buf **bpp) /* output: buffer for the block */ { - xfs_buf_t *bp; /* block buffer, result */ + struct xfs_buf *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; int nmap = 1; @@ -101,7 +101,7 @@ xfs_rtfind_back( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t firstbit; /* first useful bit in the word */ @@ -276,7 +276,7 @@ xfs_rtfind_forw( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ @@ -447,11 +447,11 @@ xfs_rtmodify_summary_int( int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { - xfs_buf_t *bp; /* buffer for the summary block */ + struct xfs_buf *bp; /* buffer for the summary block */ int error; /* error value */ xfs_fsblock_t sb; /* summary fsblock */ int so; /* index into the summary file */ @@ -517,7 +517,7 @@ xfs_rtmodify_summary( int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { return xfs_rtmodify_summary_int(mp, tp, log, bbno, @@ -539,7 +539,7 @@ xfs_rtmodify_range( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtword_t *first; /* first used word in the buffer */ @@ -690,7 +690,7 @@ xfs_rtfree_range( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t start, /* starting block to free */ xfs_extlen_t len, /* length to free */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { xfs_rtblock_t end; /* end of the freed extent */ @@ -773,7 +773,7 @@ xfs_rtcheck_range( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ @@ -969,7 +969,7 @@ xfs_rtfree_extent( int error; /* error value */ xfs_mount_t *mp; /* file system mount structure */ xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp = NULL; /* summary file block buffer */ + struct xfs_buf *sumbp = NULL; /* summary file block buffer */ mp = tp->t_mountp; diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 5aeafa59ed27..bbda117e5d85 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -360,21 +360,18 @@ xfs_validate_sb_common( } } - if (sbp->sb_unit) { - if (!xfs_sb_version_hasdalign(sbp) || - sbp->sb_unit > sbp->sb_width || - (sbp->sb_width % sbp->sb_unit) != 0) { - xfs_notice(mp, "SB stripe unit sanity check failed"); - return -EFSCORRUPTED; - } - } else if (xfs_sb_version_hasdalign(sbp)) { + /* + * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) + * would imply the image is corrupted. + */ + if (!!sbp->sb_unit ^ xfs_sb_version_hasdalign(sbp)) { xfs_notice(mp, "SB stripe alignment sanity check failed"); return -EFSCORRUPTED; - } else if (sbp->sb_width) { - xfs_notice(mp, "SB stripe width sanity check failed"); - return -EFSCORRUPTED; } + if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), + XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) + return -EFSCORRUPTED; if (xfs_sb_version_hascrc(&mp->m_sb) && sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { @@ -383,17 +380,6 @@ xfs_validate_sb_common( } /* - * Until this is fixed only page-sized or smaller data blocks work. - */ - if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - xfs_warn(mp, - "File system with blocksize %d bytes. " - "Only pagesize (%ld) or less will currently work.", - sbp->sb_blocksize, PAGE_SIZE); - return -ENOSYS; - } - - /* * Currently only very few inode sizes are supported. */ switch (sbp->sb_inodesize) { @@ -408,22 +394,6 @@ xfs_validate_sb_common( return -ENOSYS; } - if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || - xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { - xfs_warn(mp, - "file system too large to be mounted on this system."); - return -EFBIG; - } - - /* - * Don't touch the filesystem if a user tool thinks it owns the primary - * superblock. mkfs doesn't clear the flag from secondary supers, so - * we don't check them at all. - */ - if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && sbp->sb_inprogress) { - xfs_warn(mp, "Offline file system operation in progress!"); - return -EFSCORRUPTED; - } return 0; } @@ -1233,3 +1203,61 @@ xfs_sb_get_secondary( *bpp = bp; return 0; } + +/* + * sunit, swidth, sectorsize(optional with 0) should be all in bytes, + * so users won't be confused by values in error messages. + */ +bool +xfs_validate_stripe_geometry( + struct xfs_mount *mp, + __s64 sunit, + __s64 swidth, + int sectorsize, + bool silent) +{ + if (swidth > INT_MAX) { + if (!silent) + xfs_notice(mp, +"stripe width (%lld) is too large", swidth); + return false; + } + + if (sunit > swidth) { + if (!silent) + xfs_notice(mp, +"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); + return false; + } + + if (sectorsize && (int)sunit % sectorsize) { + if (!silent) + xfs_notice(mp, +"stripe unit (%lld) must be a multiple of the sector size (%d)", + sunit, sectorsize); + return false; + } + + if (sunit && !swidth) { + if (!silent) + xfs_notice(mp, +"invalid stripe unit (%lld) and stripe width of 0", sunit); + return false; + } + + if (!sunit && swidth) { + if (!silent) + xfs_notice(mp, +"invalid stripe width (%lld) and stripe unit of 0", swidth); + return false; + } + + if (sunit && (int)swidth % (int)sunit) { + if (!silent) + xfs_notice(mp, +"stripe width (%lld) must be a multiple of the stripe unit (%lld)", + swidth, sunit); + return false; + } + return true; +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 92465a9a5162..f79f9dc632b6 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -42,4 +42,7 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); +extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, + __s64 sunit, __s64 swidth, int sectorsize, bool silent); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c795ae47b3c9..8c61a461bf7b 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -62,7 +62,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ #define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ -#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ #define XFS_TRANS_RES_FDBLKS 0x80 /* reserve newly freed blocks */ diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 4f595546a639..b254fbeaaa50 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -61,6 +61,29 @@ xfs_verify_fsbno( return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); } +/* + * Verify that a data device extent is fully contained inside the filesystem, + * does not cross an AG boundary, and does not point at static metadata. + */ +bool +xfs_verify_fsbext( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + xfs_fsblock_t len) +{ + if (fsbno + len <= fsbno) + return false; + + if (!xfs_verify_fsbno(mp, fsbno)) + return false; + + if (!xfs_verify_fsbno(mp, fsbno + len - 1)) + return false; + + return XFS_FSB_TO_AGNO(mp, fsbno) == + XFS_FSB_TO_AGNO(mp, fsbno + len - 1); +} + /* Calculate the first and last possible inode number in an AG. */ void xfs_agino_range( @@ -175,6 +198,22 @@ xfs_verify_rtbno( return rtbno < mp->m_sb.sb_rblocks; } +/* Verify that a realtime device extent is fully contained inside the volume. */ +bool +xfs_verify_rtext( + struct xfs_mount *mp, + xfs_rtblock_t rtbno, + xfs_rtblock_t len) +{ + if (rtbno + len <= rtbno) + return false; + + if (!xfs_verify_rtbno(mp, rtbno)) + return false; + + return xfs_verify_rtbno(mp, rtbno + len - 1); +} + /* Calculate the range of valid icount values. */ void xfs_icount_range( @@ -219,3 +258,28 @@ xfs_verify_dablk( return dabno <= max_dablk; } + +/* Check that a file block offset does not exceed the maximum. */ +bool +xfs_verify_fileoff( + struct xfs_mount *mp, + xfs_fileoff_t off) +{ + return off <= XFS_MAX_FILEOFF; +} + +/* Check that a range of file block offsets do not exceed the maximum. */ +bool +xfs_verify_fileext( + struct xfs_mount *mp, + xfs_fileoff_t off, + xfs_fileoff_t len) +{ + if (off + len <= off) + return false; + + if (!xfs_verify_fileoff(mp, off)) + return false; + + return xfs_verify_fileoff(mp, off + len - 1); +} diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 397d94775440..064bd6e8c922 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -184,6 +184,8 @@ xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno); bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno); bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno, + xfs_fsblock_t len); void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t *first, xfs_agino_t *last); @@ -195,9 +197,14 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); +bool xfs_verify_rtext(struct xfs_mount *mp, xfs_rtblock_t rtbno, + xfs_rtblock_t len); bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount); bool xfs_verify_dablk(struct xfs_mount *mp, xfs_fileoff_t off); void xfs_icount_range(struct xfs_mount *mp, unsigned long long *min, unsigned long long *max); +bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); +bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, + xfs_fileoff_t len); #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 401f71579ce6..23690f824ffa 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -829,8 +829,6 @@ xrep_agi_calc_from_btrees( cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno, XFS_BTNUM_FINO); - if (error) - goto err; error = xfs_btree_count_blocks(cur, &blocks); if (error) goto err; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index fed56d213a3f..33559c3a4bc3 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -319,7 +319,6 @@ xchk_bmap_iextent( struct xfs_bmbt_irec *irec) { struct xfs_mount *mp = info->sc->mp; - xfs_filblks_t end; int error = 0; /* @@ -330,6 +329,10 @@ xchk_bmap_iextent( xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + xchk_bmap_dirattr_extent(ip, info, irec); /* There should never be a "hole" extent in either extent list. */ @@ -349,20 +352,12 @@ xchk_bmap_iextent( if (irec->br_blockcount > MAXEXTLEN) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) - xchk_fblock_set_corrupt(info->sc, info->whichfork, - irec->br_startoff); - end = irec->br_startblock + irec->br_blockcount - 1; if (info->is_rt && - (!xfs_verify_rtbno(mp, irec->br_startblock) || - !xfs_verify_rtbno(mp, end))) + !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount)) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (!info->is_rt && - (!xfs_verify_fsbno(mp, irec->br_startblock) || - !xfs_verify_fsbno(mp, end) || - XFS_FSB_TO_AGNO(mp, irec->br_startblock) != - XFS_FSB_TO_AGNO(mp, end))) + !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); @@ -563,10 +558,6 @@ xchk_bmap_check_ag_rmaps( return error; cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno); - if (!cur) { - error = -ENOMEM; - goto out_agf; - } sbcri.sc = sc; sbcri.whichfork = whichfork; @@ -575,7 +566,6 @@ xchk_bmap_check_ag_rmaps( error = 0; xfs_btree_del_cursor(cur, error); -out_agf: xfs_trans_brelse(sc->tp, agf); return error; } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 18876056e5e0..8ea6d4aa3f55 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -466,8 +466,6 @@ xchk_ag_btcur_init( /* Set up a bnobt cursor for cross-referencing. */ sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, agno, XFS_BTNUM_BNO); - if (!sa->bno_cur) - goto err; } if (sa->agf_bp && @@ -475,8 +473,6 @@ xchk_ag_btcur_init( /* Set up a cntbt cursor for cross-referencing. */ sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, agno, XFS_BTNUM_CNT); - if (!sa->cnt_cur) - goto err; } /* Set up a inobt cursor for cross-referencing. */ @@ -484,8 +480,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) { sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, agno, XFS_BTNUM_INO); - if (!sa->ino_cur) - goto err; } /* Set up a finobt cursor for cross-referencing. */ @@ -493,8 +487,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) { sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, agno, XFS_BTNUM_FINO); - if (!sa->fino_cur) - goto err; } /* Set up a rmapbt cursor for cross-referencing. */ @@ -502,8 +494,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) { sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, agno); - if (!sa->rmap_cur) - goto err; } /* Set up a refcountbt cursor for cross-referencing. */ @@ -511,13 +501,9 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) { sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, sa->agf_bp, agno); - if (!sa->refc_cur) - goto err; } return 0; -err: - return -ENOMEM; } /* Release the AG header context and btree cursors. */ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index b045e95c2ea7..178b3455a170 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -66,8 +66,18 @@ xchk_dir_check_ftype( * eofblocks cleanup (which allocates what would be a nested * transaction), we can't use DONTCACHE here because DONTCACHE * inodes can trigger immediate inactive cleanup of the inode. + * + * If _iget returns -EINVAL or -ENOENT then the child inode number is + * garbage and the directory is corrupt. If the _iget returns + * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a + * cross referencing error. Any other error is an operational error. */ error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); + if (error == -EINVAL || error == -ENOENT) { + error = -EFSCORRUPTED; + xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, 0, &error); + goto out; + } if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, &error)) goto out; @@ -105,6 +115,7 @@ xchk_dir_actor( struct xfs_name xname; xfs_ino_t lookup_ino; xfs_dablk_t offset; + bool checked_ftype = false; int error = 0; sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter); @@ -133,6 +144,7 @@ xchk_dir_actor( if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + checked_ftype = true; if (ino != ip->i_ino) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); @@ -144,6 +156,7 @@ xchk_dir_actor( if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + checked_ftype = true; if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); @@ -167,9 +180,11 @@ xchk_dir_actor( } /* Verify the file type. This function absorbs error codes. */ - error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); - if (error) - goto out; + if (!checked_ftype) { + error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); + if (error) + goto out; + } out: /* * A negative error code returned here is supposed to cause the diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index bb25ff1b770d..faf65eb5bd31 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -185,10 +185,6 @@ xchk_inode_flags2( if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) goto bad; - /* dax and reflink make no sense, currently */ - if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) - goto bad; - /* no bigtime iflag without the bigtime feature */ if (xfs_dinode_has_bigtime(dip) && !xfs_sb_version_hasbigtime(&mp->m_sb)) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 855aa8bcab64..66c35f6dfc24 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -164,13 +164,13 @@ xchk_parent_validate( * can't use DONTCACHE here because DONTCACHE inodes can trigger * immediate inactive cleanup of the inode. * - * If _iget returns -EINVAL then the parent inode number is garbage - * and the directory is corrupt. If the _iget returns -EFSCORRUPTED - * or -EFSBADCRC then the parent is corrupt which is a cross - * referencing error. Any other error is an operational error. + * If _iget returns -EINVAL or -ENOENT then the parent inode number is + * garbage and the directory is corrupt. If the _iget returns + * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a + * cross referencing error. Any other error is an operational error. */ error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); - if (error == -EINVAL) { + if (error == -EINVAL || error == -ENOENT) { error = -EFSCORRUPTED; xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); goto out; diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 76e4ffe0315b..d409ca592178 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -52,9 +52,7 @@ xchk_rtbitmap_rec( startblock = rec->ar_startext * tp->t_mountp->m_sb.sb_rextsize; blockcount = rec->ar_extcount * tp->t_mountp->m_sb.sb_rextsize; - if (startblock + blockcount <= startblock || - !xfs_verify_rtbno(sc->mp, startblock) || - !xfs_verify_rtbno(sc->mp, startblock + blockcount - 1)) + if (!xfs_verify_rtext(sc->mp, startblock, blockcount)) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index c544951a0c07..779cb73b3d00 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -16,6 +16,7 @@ #include "xfs_acl.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_trans.h" #include <linux/posix_acl_xattr.h> @@ -212,21 +213,28 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) } static int -xfs_set_mode(struct inode *inode, umode_t mode) +xfs_acl_set_mode( + struct inode *inode, + umode_t mode) { - int error = 0; - - if (mode != inode->i_mode) { - struct iattr iattr; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; - iattr.ia_valid = ATTR_MODE | ATTR_CTIME; - iattr.ia_mode = mode; - iattr.ia_ctime = current_time(inode); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); + if (error) + return error; - error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); - } + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - return error; + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp); } int @@ -251,18 +259,14 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) } set_acl: - error = __xfs_set_acl(inode, acl, type); - if (error) - return error; - /* * We set the mode after successfully updating the ACL xattr because the * xattr update can fail at ENOSPC and we don't want to change the mode * if the ACL update hasn't been applied. */ - if (set_mode) - error = xfs_set_mode(inode, mode); - + error = __xfs_set_acl(inode, acl, type); + if (!error && set_mode && mode != inode->i_mode) + error = xfs_acl_set_mode(inode, mode); return error; } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 9e16a4d0f97c..93e4d8ae6e92 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -417,6 +417,40 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = { .cancel_item = xfs_bmap_update_cancel_item, }; +/* Is this recovered BUI ok? */ +static inline bool +xfs_bui_validate( + struct xfs_mount *mp, + struct xfs_bui_log_item *buip) +{ + struct xfs_map_extent *bmap; + + /* Only one mapping operation per BUI... */ + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) + return false; + + bmap = &buip->bui_format.bui_extents[0]; + + if (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS) + return false; + + switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) { + case XFS_BMAP_MAP: + case XFS_BMAP_UNMAP: + break; + default: + return false; + } + + if (!xfs_verify_ino(mp, bmap->me_owner)) + return false; + + if (!xfs_verify_fileext(mp, bmap->me_startoff, bmap->me_len)) + return false; + + return xfs_verify_fsbext(mp, bmap->me_startblock, bmap->me_len); +} + /* * Process a bmap update intent item that was recovered from the log. * We need to update some inode's bmbt. @@ -433,47 +467,24 @@ xfs_bui_item_recover( struct xfs_mount *mp = lip->li_mountp; struct xfs_map_extent *bmap; struct xfs_bud_log_item *budp; - xfs_fsblock_t startblock_fsb; - xfs_fsblock_t inode_fsb; xfs_filblks_t count; xfs_exntst_t state; unsigned int bui_type; int whichfork; int error = 0; - /* Only one mapping operation per BUI... */ - if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) + if (!xfs_bui_validate(mp, buip)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &buip->bui_format, sizeof(buip->bui_format)); return -EFSCORRUPTED; + } - /* - * First check the validity of the extent described by the - * BUI. If anything is bad, then toss the BUI. - */ bmap = &buip->bui_format.bui_extents[0]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, bmap->me_startblock)); - inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp, - XFS_INO_TO_FSB(mp, bmap->me_owner))); state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM; whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; - switch (bui_type) { - case XFS_BMAP_MAP: - case XFS_BMAP_UNMAP: - break; - default: - return -EFSCORRUPTED; - } - if (startblock_fsb == 0 || - bmap->me_len == 0 || - inode_fsb == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - bmap->me_len >= mp->m_sb.sb_agblocks || - inode_fsb >= mp->m_sb.sb_dblocks || - (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) - return -EFSCORRUPTED; /* Grab the inode. */ error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4e4cf91f4f9f..f8400bbd6473 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -278,7 +278,7 @@ _xfs_buf_alloc( */ STATIC int _xfs_buf_get_pages( - xfs_buf_t *bp, + struct xfs_buf *bp, int page_count) { /* Make sure that we have a page list */ @@ -302,7 +302,7 @@ _xfs_buf_get_pages( */ STATIC void _xfs_buf_free_pages( - xfs_buf_t *bp) + struct xfs_buf *bp) { if (bp->b_pages != bp->b_page_array) { kmem_free(bp->b_pages); @@ -319,7 +319,7 @@ _xfs_buf_free_pages( */ static void xfs_buf_free( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_free(bp, _RET_IP_); @@ -352,7 +352,7 @@ xfs_buf_free( */ STATIC int xfs_buf_allocate_memory( - xfs_buf_t *bp, + struct xfs_buf *bp, uint flags) { size_t size; @@ -463,7 +463,7 @@ out_free_pages: */ STATIC int _xfs_buf_map_pages( - xfs_buf_t *bp, + struct xfs_buf *bp, uint flags) { ASSERT(bp->b_flags & _XBF_PAGES); @@ -590,7 +590,7 @@ xfs_buf_find( struct xfs_buf **found_bp) { struct xfs_perag *pag; - xfs_buf_t *bp; + struct xfs_buf *bp; struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; xfs_daddr_t eofs; int i; @@ -762,7 +762,7 @@ found: int _xfs_buf_read( - xfs_buf_t *bp, + struct xfs_buf *bp, xfs_buf_flags_t flags) { ASSERT(!(flags & XBF_WRITE)); @@ -1005,7 +1005,7 @@ xfs_buf_get_uncached( */ void xfs_buf_hold( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_hold(bp, _RET_IP_); atomic_inc(&bp->b_hold); @@ -1017,7 +1017,7 @@ xfs_buf_hold( */ void xfs_buf_rele( - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_perag *pag = bp->b_pag; bool release; @@ -1161,7 +1161,7 @@ xfs_buf_unlock( STATIC void xfs_buf_wait_unpin( - xfs_buf_t *bp) + struct xfs_buf *bp) { DECLARE_WAITQUEUE (wait, current); @@ -1373,7 +1373,7 @@ xfs_buf_ioend_work( struct work_struct *work) { struct xfs_buf *bp = - container_of(work, xfs_buf_t, b_ioend_work); + container_of(work, struct xfs_buf, b_ioend_work); xfs_buf_ioend(bp); } @@ -1388,7 +1388,7 @@ xfs_buf_ioend_async( void __xfs_buf_ioerror( - xfs_buf_t *bp, + struct xfs_buf *bp, int error, xfs_failaddr_t failaddr) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index bfd2907e7bc4..5d91a31298a4 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -124,7 +124,7 @@ struct xfs_buf_ops { xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp); }; -typedef struct xfs_buf { +struct xfs_buf { /* * first cacheline holds all the fields needed for an uncontended cache * hit to be fully processed. The semaphore straddles the cacheline @@ -190,7 +190,7 @@ typedef struct xfs_buf { int b_last_error; const struct xfs_buf_ops *b_ops; -} xfs_buf_t; +}; /* Finding and Reading Buffers */ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, @@ -253,16 +253,16 @@ int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags); void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ -extern void xfs_buf_rele(xfs_buf_t *); +extern void xfs_buf_rele(struct xfs_buf *); /* Locking and Unlocking Buffers */ -extern int xfs_buf_trylock(xfs_buf_t *); -extern void xfs_buf_lock(xfs_buf_t *); -extern void xfs_buf_unlock(xfs_buf_t *); +extern int xfs_buf_trylock(struct xfs_buf *); +extern void xfs_buf_lock(struct xfs_buf *); +extern void xfs_buf_unlock(struct xfs_buf *); #define xfs_buf_islocked(bp) \ ((bp)->b_sema.count <= 0) -static inline void xfs_buf_relse(xfs_buf_t *bp) +static inline void xfs_buf_relse(struct xfs_buf *bp) { xfs_buf_unlock(bp); xfs_buf_rele(bp); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 0356f2e340a1..dc0be2a639cc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -412,7 +412,7 @@ xfs_buf_item_unpin( int remove) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); - xfs_buf_t *bp = bip->bli_buf; + struct xfs_buf *bp = bip->bli_buf; int stale = bip->bli_flags & XFS_BLI_STALE; int freed; @@ -942,7 +942,7 @@ xfs_buf_item_free( */ void xfs_buf_item_relse( - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 6c11bfc3d452..93223ebb3372 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -578,6 +578,15 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = { .cancel_item = xfs_extent_free_cancel_item, }; +/* Is this recovered EFI ok? */ +static inline bool +xfs_efi_validate_ext( + struct xfs_mount *mp, + struct xfs_extent *extp) +{ + return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len); +} + /* * Process an extent free intent item that was recovered from * the log. We need to free the extents that it describes. @@ -592,7 +601,6 @@ xfs_efi_item_recover( struct xfs_efd_log_item *efdp; struct xfs_trans *tp; struct xfs_extent *extp; - xfs_fsblock_t startblock_fsb; int i; int error = 0; @@ -602,14 +610,13 @@ xfs_efi_item_recover( * just toss the EFI. */ for (i = 0; i < efip->efi_format.efi_nextents; i++) { - extp = &efip->efi_format.efi_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, extp->ext_start)); - if (startblock_fsb == 0 || - extp->ext_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - extp->ext_len >= mp->m_sb.sb_agblocks) + if (!xfs_efi_validate_ext(mp, + &efip->efi_format.efi_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &efip->efi_format, + sizeof(efip->efi_format)); return -EFSCORRUPTED; + } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index b7c5783a031c..959ce91a3755 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -28,7 +28,7 @@ xfs_growfs_data_private( xfs_mount_t *mp, /* mount point for filesystem */ xfs_growfs_data_t *in) /* growfs data input struct */ { - xfs_buf_t *bp; + struct xfs_buf *bp; int error; xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2bfbcf28b1bd..b7352bc4c815 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -761,68 +761,26 @@ xfs_inode_inherit_flags2( } /* - * Allocate an inode on disk and return a copy of its in-core version. - * The in-core inode is locked exclusively. Set mode, nlink, and rdev - * appropriately within the inode. The uid and gid for the inode are - * set according to the contents of the given cred structure. - * - * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() - * has a free inode available, call xfs_iget() to obtain the in-core - * version of the allocated inode. Finally, fill in the inode and - * log its initial contents. In this case, ialloc_context would be - * set to NULL. - * - * If xfs_dialloc() does not have an available inode, it will replenish - * its supply by doing an allocation. Since we can only do one - * allocation within a transaction without deadlocks, we must commit - * the current transaction before returning the inode itself. - * In this case, therefore, we will set ialloc_context and return. - * The caller should then commit the current transaction, start a new - * transaction, and call xfs_ialloc() again to actually get the inode. - * - * To ensure that some other process does not grab the inode that - * was allocated during the first call to xfs_ialloc(), this routine - * also returns the [locked] bp pointing to the head of the freelist - * as ialloc_context. The caller should hold this buffer across - * the commit and pass it back into this routine on the second call. - * - * If we are allocating quota inodes, we do not have a parent inode - * to attach to or associate with (i.e. pip == NULL) because they - * are not linked into the directory structure - they are attached - * directly to the superblock - and so have no parent. + * Initialise a newly allocated inode and return the in-core inode to the + * caller locked exclusively. */ static int -xfs_ialloc( - xfs_trans_t *tp, - xfs_inode_t *pip, - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, - xfs_buf_t **ialloc_context, - xfs_inode_t **ipp) -{ - struct xfs_mount *mp = tp->t_mountp; - xfs_ino_t ino; - xfs_inode_t *ip; - uint flags; - int error; - struct timespec64 tv; - struct inode *inode; - - /* - * Call the space management code to pick - * the on-disk inode to be allocated. - */ - error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, - ialloc_context, &ino); - if (error) - return error; - if (*ialloc_context || ino == NULLFSINO) { - *ipp = NULL; - return 0; - } - ASSERT(*ialloc_context == NULL); +xfs_init_new_inode( + struct xfs_trans *tp, + struct xfs_inode *pip, + xfs_ino_t ino, + umode_t mode, + xfs_nlink_t nlink, + dev_t rdev, + prid_t prid, + struct xfs_inode **ipp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip; + unsigned int flags; + int error; + struct timespec64 tv; + struct inode *inode; /* * Protect against obviously corrupt allocation btree records. Later @@ -837,14 +795,13 @@ xfs_ialloc( } /* - * Get the in-core inode with the lock held exclusively. - * This is because we're setting fields here we need - * to prevent others from looking at until we're done. + * Get the in-core inode with the lock held exclusively to prevent + * others from looking at until we're done. */ - error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, - XFS_ILOCK_EXCL, &ip); + error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); if (error) return error; + ASSERT(ip != NULL); inode = VFS_I(ip); inode->i_mode = mode; @@ -932,143 +889,51 @@ xfs_ialloc( } /* - * Allocates a new inode from disk and return a pointer to the - * incore copy. This routine will internally commit the current - * transaction and allocate a new one if the Space Manager needed - * to do an allocation to replenish the inode free-list. - * - * This routine is designed to be called from xfs_create and - * xfs_create_dir. + * Allocates a new inode from disk and return a pointer to the incore copy. This + * routine will internally commit the current transaction and allocate a new one + * if we needed to allocate more on-disk free inodes to perform the requested + * operation. * + * If we are allocating quota inodes, we do not have a parent inode to attach to + * or associate with (i.e. dp == NULL) because they are not linked into the + * directory structure - they are attached directly to the superblock - and so + * have no parent. */ int xfs_dir_ialloc( - xfs_trans_t **tpp, /* input: current transaction; - output: may be a new transaction. */ - xfs_inode_t *dp, /* directory within whose allocate - the inode. */ - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, /* project id */ - xfs_inode_t **ipp) /* pointer to inode; it will be - locked. */ -{ - xfs_trans_t *tp; - xfs_inode_t *ip; - xfs_buf_t *ialloc_context = NULL; - int code; - void *dqinfo; - uint tflags; - - tp = *tpp; - ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + struct xfs_trans **tpp, + struct xfs_inode *dp, + umode_t mode, + xfs_nlink_t nlink, + dev_t rdev, + prid_t prid, + struct xfs_inode **ipp) +{ + struct xfs_buf *agibp; + xfs_ino_t parent_ino = dp ? dp->i_ino : 0; + xfs_ino_t ino; + int error; - /* - * xfs_ialloc will return a pointer to an incore inode if - * the Space Manager has an available inode on the free - * list. Otherwise, it will do an allocation and replenish - * the freelist. Since we can only do one allocation per - * transaction without deadlocks, we will need to commit the - * current transaction and start a new one. We will then - * need to call xfs_ialloc again to get the inode. - * - * If xfs_ialloc did an allocation to replenish the freelist, - * it returns the bp containing the head of the freelist as - * ialloc_context. We will hold a lock on it across the - * transaction commit so that no other process can steal - * the inode(s) that we've just allocated. - */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context, - &ip); + ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES); /* - * Return an error if we were unable to allocate a new inode. - * This should only happen if we run out of space on disk or - * encounter a disk error. + * Call the space management code to pick the on-disk inode to be + * allocated. */ - if (code) { - *ipp = NULL; - return code; - } - if (!ialloc_context && !ip) { - *ipp = NULL; - return -ENOSPC; - } - - /* - * If the AGI buffer is non-NULL, then we were unable to get an - * inode in one operation. We need to commit the current - * transaction and call xfs_ialloc() again. It is guaranteed - * to succeed the second time. - */ - if (ialloc_context) { - /* - * Normally, xfs_trans_commit releases all the locks. - * We call bhold to hang on to the ialloc_context across - * the commit. Holding this buffer prevents any other - * processes from doing any allocations in this - * allocation group. - */ - xfs_trans_bhold(tp, ialloc_context); - - /* - * We want the quota changes to be associated with the next - * transaction, NOT this one. So, detach the dqinfo from this - * and attach it to the next transaction. - */ - dqinfo = NULL; - tflags = 0; - if (tp->t_dqinfo) { - dqinfo = (void *)tp->t_dqinfo; - tp->t_dqinfo = NULL; - tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY; - tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); - } - - code = xfs_trans_roll(&tp); - - /* - * Re-attach the quota info that we detached from prev trx. - */ - if (dqinfo) { - tp->t_dqinfo = dqinfo; - tp->t_flags |= tflags; - } - - if (code) { - xfs_buf_relse(ialloc_context); - *tpp = tp; - *ipp = NULL; - return code; - } - xfs_trans_bjoin(tp, ialloc_context); - - /* - * Call ialloc again. Since we've locked out all - * other allocations in this allocation group, - * this call should always succeed. - */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, - &ialloc_context, &ip); - - /* - * If we get an error at this point, return to the caller - * so that the current transaction can be aborted. - */ - if (code) { - *tpp = tp; - *ipp = NULL; - return code; - } - ASSERT(!ialloc_context && ip); + error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp); + if (error) + return error; - } + if (!agibp) + return -ENOSPC; - *ipp = ip; - *tpp = tp; + /* Allocate an inode from the selected AG */ + error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino); + if (error) + return error; + ASSERT(ino != NULLFSINO); - return 0; + return xfs_init_new_inode(*tpp, dp, ino, mode, nlink, rdev, prid, ipp); } /* @@ -1521,7 +1386,7 @@ xfs_itruncate_extents_flags( * the page cache can't scale that far. */ first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); - if (first_unmap_block >= XFS_MAX_FILEOFF) { + if (!xfs_verify_fileoff(mp, first_unmap_block)) { WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF); return 0; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 751a3d1d7d84..eca333f5f715 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -407,9 +407,9 @@ void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); -int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, - xfs_nlink_t, dev_t, prid_t, - struct xfs_inode **); +int xfs_dir_ialloc(struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode, + xfs_nlink_t nlink, dev_t dev, prid_t prid, + struct xfs_inode **ipp); static inline int xfs_itruncate_extents( diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1414ab79eacf..67c8dc9de8aa 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -206,10 +206,8 @@ xfs_generic_create( xfs_finish_inode_setup(ip); out_free_acl: - if (default_acl) - posix_acl_release(default_acl); - if (acl) - posix_acl_release(acl); + posix_acl_release(default_acl); + posix_acl_release(acl); return error; out_cleanup_inode: @@ -648,11 +646,10 @@ xfs_vn_change_ok( * Caution: The caller of this function is responsible for calling * setattr_prepare() or otherwise verifying the change is fine. */ -int +static int xfs_setattr_nonsize( struct xfs_inode *ip, - struct iattr *iattr, - int flags) + struct iattr *iattr) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -809,7 +806,7 @@ xfs_setattr_nonsize( * to attr_set. No previous user of the generic * Posix ACL code seems to care about this issue either. */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + if (mask & ATTR_MODE) { error = posix_acl_chmod(inode, inode->i_mode); if (error) return error; @@ -826,22 +823,6 @@ out_dqrele: return error; } -int -xfs_vn_setattr_nonsize( - struct dentry *dentry, - struct iattr *iattr) -{ - struct xfs_inode *ip = XFS_I(d_inode(dentry)); - int error; - - trace_xfs_setattr(ip); - - error = xfs_vn_change_ok(dentry, iattr); - if (error) - return error; - return xfs_setattr_nonsize(ip, iattr, 0); -} - /* * Truncate file. Must have write permission and not be a directory. * @@ -881,7 +862,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr, 0); + return xfs_setattr_nonsize(ip, iattr); } /* @@ -1069,11 +1050,11 @@ xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { + struct inode *inode = d_inode(dentry); + struct xfs_inode *ip = XFS_I(inode); int error; if (iattr->ia_valid & ATTR_SIZE) { - struct inode *inode = d_inode(dentry); - struct xfs_inode *ip = XFS_I(inode); uint iolock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); @@ -1088,7 +1069,11 @@ xfs_vn_setattr( error = xfs_vn_setattr_size(dentry, iattr); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); } else { - error = xfs_vn_setattr_nonsize(dentry, iattr); + trace_xfs_setattr(ip); + + error = xfs_vn_change_ok(dentry, iattr); + if (!error) + error = xfs_setattr_nonsize(ip, iattr); } return error; diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index 4d24ff309f59..99ca745c1071 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -13,15 +13,7 @@ extern const struct file_operations xfs_dir_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -/* - * Internal setattr interfaces. - */ -#define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */ - extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr); -extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, - int flags); -extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap); extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 2a45138831e3..eae3aff9bc97 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -363,7 +363,7 @@ xfs_iwalk_run_callbacks( /* Delete cursor but remember the last record we cached... */ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); irec = &iwag->recs[iwag->nr_recs - 1]; - ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK); + ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK); error = xfs_iwalk_ag_recs(iwag); if (error) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87886b7f77da..97f31308de03 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2559,8 +2559,11 @@ xlog_recover_process_intents( spin_unlock(&ailp->ail_lock); error = lip->li_ops->iop_recover(lip, &capture_list); spin_lock(&ailp->ail_lock); - if (error) + if (error) { + trace_xlog_intent_recovery_failed(log->l_mp, error, + lip->li_ops->iop_recover); break; + } } xfs_trans_ail_cursor_done(&cur); @@ -2628,7 +2631,7 @@ xlog_recover_clear_agi_bucket( { xfs_trans_t *tp; xfs_agi_t *agi; - xfs_buf_t *agibp; + struct xfs_buf *agibp; int offset; int error; @@ -2746,7 +2749,7 @@ xlog_recover_process_iunlinks( xfs_mount_t *mp; xfs_agnumber_t agno; xfs_agi_t *agi; - xfs_buf_t *agibp; + struct xfs_buf *agibp; xfs_agino_t agino; int bucket; int error; @@ -3498,8 +3501,8 @@ xlog_recover_check_summary( struct xlog *log) { xfs_mount_t *mp; - xfs_buf_t *agfbp; - xfs_buf_t *agibp; + struct xfs_buf *agfbp; + struct xfs_buf *agibp; xfs_agnumber_t agno; uint64_t freeblks; uint64_t itotal; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b2a9abee8b2b..c134eb4aeaa8 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -737,15 +737,15 @@ xfs_qm_destroy_quotainfo( */ STATIC int xfs_qm_qino_alloc( - xfs_mount_t *mp, - xfs_inode_t **ip, - uint flags) + struct xfs_mount *mp, + struct xfs_inode **ipp, + unsigned int flags) { - xfs_trans_t *tp; - int error; - bool need_alloc = true; + struct xfs_trans *tp; + int error; + bool need_alloc = true; - *ip = NULL; + *ipp = NULL; /* * With superblock that doesn't have separate pquotino, we * share an inode between gquota and pquota. If the on-disk @@ -771,7 +771,7 @@ xfs_qm_qino_alloc( return -EFSCORRUPTED; } if (ino != NULLFSINO) { - error = xfs_iget(mp, NULL, ino, 0, 0, ip); + error = xfs_iget(mp, NULL, ino, 0, 0, ipp); if (error) return error; mp->m_sb.sb_gquotino = NULLFSINO; @@ -787,7 +787,7 @@ xfs_qm_qino_alloc( return error; if (need_alloc) { - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip); + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ipp); if (error) { xfs_trans_cancel(tp); return error; @@ -812,11 +812,11 @@ xfs_qm_qino_alloc( mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT; } if (flags & XFS_QMOPT_UQUOTA) - mp->m_sb.sb_uquotino = (*ip)->i_ino; + mp->m_sb.sb_uquotino = (*ipp)->i_ino; else if (flags & XFS_QMOPT_GQUOTA) - mp->m_sb.sb_gquotino = (*ip)->i_ino; + mp->m_sb.sb_gquotino = (*ipp)->i_ino; else - mp->m_sb.sb_pquotino = (*ip)->i_ino; + mp->m_sb.sb_pquotino = (*ipp)->i_ino; spin_unlock(&mp->m_sb_lock); xfs_log_sb(tp); @@ -826,7 +826,7 @@ xfs_qm_qino_alloc( xfs_alert(mp, "%s failed (error %d)!", __func__, error); } if (need_alloc) - xfs_finish_inode_setup(*ip); + xfs_finish_inode_setup(*ipp); return error; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7529eb63ce94..07ebccbbf4df 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -417,6 +417,31 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = { .cancel_item = xfs_refcount_update_cancel_item, }; +/* Is this recovered CUI ok? */ +static inline bool +xfs_cui_validate_phys( + struct xfs_mount *mp, + struct xfs_phys_extent *refc) +{ + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return false; + + if (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) + return false; + + switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { + case XFS_REFCOUNT_INCREASE: + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: + break; + default: + return false; + } + + return xfs_verify_fsbext(mp, refc->pe_startblock, refc->pe_len); +} + /* * Process a refcount update intent item that was recovered from the log. * We need to update the refcountbt. @@ -433,11 +458,9 @@ xfs_cui_item_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_mountp; - xfs_fsblock_t startblock_fsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; unsigned int refc_type; - bool op_ok; bool requeue_only = false; enum xfs_refcount_intent_type type; int i; @@ -449,26 +472,13 @@ xfs_cui_item_recover( * just toss the CUI. */ for (i = 0; i < cuip->cui_format.cui_nextents; i++) { - refc = &cuip->cui_format.cui_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, refc->pe_startblock)); - switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { - case XFS_REFCOUNT_INCREASE: - case XFS_REFCOUNT_DECREASE: - case XFS_REFCOUNT_ALLOC_COW: - case XFS_REFCOUNT_FREE_COW: - op_ok = true; - break; - default: - op_ok = false; - break; - } - if (!op_ok || startblock_fsb == 0 || - refc->pe_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - refc->pe_len >= mp->m_sb.sb_agblocks || - (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) + if (!xfs_cui_validate_phys(mp, + &cuip->cui_format.cui_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &cuip->cui_format, + sizeof(cuip->cui_format)); return -EFSCORRUPTED; + } } /* diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 7adc996ca6e3..49cebd68b672 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -460,6 +460,42 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = { .cancel_item = xfs_rmap_update_cancel_item, }; +/* Is this recovered RUI ok? */ +static inline bool +xfs_rui_validate_map( + struct xfs_mount *mp, + struct xfs_map_extent *rmap) +{ + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return false; + + if (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS) + return false; + + switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { + case XFS_RMAP_EXTENT_MAP: + case XFS_RMAP_EXTENT_MAP_SHARED: + case XFS_RMAP_EXTENT_UNMAP: + case XFS_RMAP_EXTENT_UNMAP_SHARED: + case XFS_RMAP_EXTENT_CONVERT: + case XFS_RMAP_EXTENT_CONVERT_SHARED: + case XFS_RMAP_EXTENT_ALLOC: + case XFS_RMAP_EXTENT_FREE: + break; + default: + return false; + } + + if (!XFS_RMAP_NON_INODE_OWNER(rmap->me_owner) && + !xfs_verify_ino(mp, rmap->me_owner)) + return false; + + if (!xfs_verify_fileext(mp, rmap->me_startoff, rmap->me_len)) + return false; + + return xfs_verify_fsbext(mp, rmap->me_startblock, rmap->me_len); +} + /* * Process an rmap update intent item that was recovered from the log. * We need to update the rmapbt. @@ -475,10 +511,8 @@ xfs_rui_item_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_mountp; - xfs_fsblock_t startblock_fsb; enum xfs_rmap_intent_type type; xfs_exntst_t state; - bool op_ok; int i; int whichfork; int error = 0; @@ -489,30 +523,13 @@ xfs_rui_item_recover( * just toss the RUI. */ for (i = 0; i < ruip->rui_format.rui_nextents; i++) { - rmap = &ruip->rui_format.rui_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, rmap->me_startblock)); - switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { - case XFS_RMAP_EXTENT_MAP: - case XFS_RMAP_EXTENT_MAP_SHARED: - case XFS_RMAP_EXTENT_UNMAP: - case XFS_RMAP_EXTENT_UNMAP_SHARED: - case XFS_RMAP_EXTENT_CONVERT: - case XFS_RMAP_EXTENT_CONVERT_SHARED: - case XFS_RMAP_EXTENT_ALLOC: - case XFS_RMAP_EXTENT_FREE: - op_ok = true; - break; - default: - op_ok = false; - break; - } - if (!op_ok || startblock_fsb == 0 || - rmap->me_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - rmap->me_len >= mp->m_sb.sb_agblocks || - (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) + if (!xfs_rui_validate_map(mp, + &ruip->rui_format.rui_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &ruip->rui_format, + sizeof(ruip->rui_format)); return -EFSCORRUPTED; + } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ede1baf31413..b4999fb01ff7 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -32,7 +32,7 @@ xfs_rtget_summary( xfs_trans_t *tp, /* transaction pointer */ int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { @@ -50,7 +50,7 @@ xfs_rtany_summary( int low, /* low log2 extent size */ int high, /* high log2 extent size */ xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ int *stat) /* out: any good extents here? */ { @@ -104,7 +104,7 @@ xfs_rtcopy_summary( xfs_trans_t *tp) /* transaction pointer */ { xfs_rtblock_t bbno; /* bitmap block number */ - xfs_buf_t *bp; /* summary buffer */ + struct xfs_buf *bp; /* summary buffer */ int error; /* error return value */ int log; /* summary level number (log length) */ xfs_suminfo_t sum; /* summary data */ @@ -144,7 +144,7 @@ xfs_rtallocate_range( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t start, /* start block to allocate */ xfs_extlen_t len, /* length to allocate */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { xfs_rtblock_t end; /* end of the allocated extent */ @@ -226,7 +226,7 @@ xfs_rtallocate_extent_block( xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ xfs_rtblock_t *nextp, /* out: next block to try */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -345,7 +345,7 @@ xfs_rtallocate_extent_exact( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -424,7 +424,7 @@ xfs_rtallocate_extent_near( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -626,7 +626,7 @@ xfs_rtallocate_extent_size( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -900,7 +900,7 @@ xfs_growfs_rt( xfs_growfs_rt_t *in) /* growfs rt input struct */ { xfs_rtblock_t bmbno; /* bitmap block number */ - xfs_buf_t *bp; /* temporary buffer */ + struct xfs_buf *bp; /* temporary buffer */ int error; /* error return value */ xfs_mount_t *nmp; /* new (fake) mount structure */ xfs_rfsblock_t nrblocks; /* new number of realtime blocks */ @@ -1151,7 +1151,7 @@ xfs_rtallocate_extent( int error; /* error value */ xfs_rtblock_t r; /* result allocated block */ xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp; /* summary file block buffer */ + struct xfs_buf *sumbp; /* summary file block buffer */ ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); ASSERT(minlen > 0 && minlen <= maxlen); diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 93e77b221355..ed885620589c 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -115,10 +115,10 @@ int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, int val); int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, int log, xfs_rtblock_t bbno, int delta, - xfs_buf_t **rbpp, xfs_fsblock_t *rsb, + struct xfs_buf **rbpp, xfs_fsblock_t *rsb, xfs_suminfo_t *sum); int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, - xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, + xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, xfs_fsblock_t *rsb); int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e3e229e52512..813be879a5e5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -199,10 +199,12 @@ xfs_fs_show_options( seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) - seq_puts(m, ",usrquota"); - else if (mp->m_qflags & XFS_UQUOTA_ACCT) - seq_puts(m, ",uqnoenforce"); + if (mp->m_qflags & XFS_UQUOTA_ACCT) { + if (mp->m_qflags & XFS_UQUOTA_ENFD) + seq_puts(m, ",usrquota"); + else + seq_puts(m, ",uqnoenforce"); + } if (mp->m_qflags & XFS_PQUOTA_ACCT) { if (mp->m_qflags & XFS_PQUOTA_ENFD) @@ -1159,7 +1161,7 @@ suffix_kstrtoint( * NOTE: mp->m_super is NULL here! */ static int -xfs_fc_parse_param( +xfs_fs_parse_param( struct fs_context *fc, struct fs_parameter *param) { @@ -1317,7 +1319,7 @@ xfs_fc_parse_param( } static int -xfs_fc_validate_params( +xfs_fs_validate_params( struct xfs_mount *mp) { /* @@ -1386,7 +1388,7 @@ xfs_fc_validate_params( } static int -xfs_fc_fill_super( +xfs_fs_fill_super( struct super_block *sb, struct fs_context *fc) { @@ -1396,7 +1398,7 @@ xfs_fc_fill_super( mp->m_super = sb; - error = xfs_fc_validate_params(mp); + error = xfs_fs_validate_params(mp); if (error) goto out_free_names; @@ -1467,6 +1469,45 @@ xfs_fc_fill_super( #endif } + /* Filesystem claims it needs repair, so refuse the mount. */ + if (xfs_sb_version_needsrepair(&mp->m_sb)) { + xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); + error = -EFSCORRUPTED; + goto out_free_sb; + } + + /* + * Don't touch the filesystem if a user tool thinks it owns the primary + * superblock. mkfs doesn't clear the flag from secondary supers, so + * we don't check them at all. + */ + if (mp->m_sb.sb_inprogress) { + xfs_warn(mp, "Offline file system operation in progress!"); + error = -EFSCORRUPTED; + goto out_free_sb; + } + + /* + * Until this is fixed only page-sized or smaller data blocks work. + */ + if (mp->m_sb.sb_blocksize > PAGE_SIZE) { + xfs_warn(mp, + "File system with blocksize %d bytes. " + "Only pagesize (%ld) or less will currently work.", + mp->m_sb.sb_blocksize, PAGE_SIZE); + error = -ENOSYS; + goto out_free_sb; + } + + /* Ensure this filesystem fits in the page cache limits */ + if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || + xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { + xfs_warn(mp, + "file system too large to be mounted on this system."); + error = -EFBIG; + goto out_free_sb; + } + /* * XFS block mappings use 54 bits to store the logical block offset. * This should suffice to handle the maximum file size that the VFS @@ -1478,7 +1519,7 @@ xfs_fc_fill_super( * Avoid integer overflow by comparing the maximum bmbt offset to the * maximum pagecache offset in units of fs blocks. */ - if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) { + if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { xfs_warn(mp, "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), @@ -1621,10 +1662,10 @@ xfs_fc_fill_super( } static int -xfs_fc_get_tree( +xfs_fs_get_tree( struct fs_context *fc) { - return get_tree_bdev(fc, xfs_fc_fill_super); + return get_tree_bdev(fc, xfs_fs_fill_super); } static int @@ -1743,7 +1784,7 @@ xfs_remount_ro( * silently ignore all options that we can't actually change. */ static int -xfs_fc_reconfigure( +xfs_fs_reconfigure( struct fs_context *fc) { struct xfs_mount *mp = XFS_M(fc->root->d_sb); @@ -1756,7 +1797,7 @@ xfs_fc_reconfigure( if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) fc->sb_flags |= SB_I_VERSION; - error = xfs_fc_validate_params(new_mp); + error = xfs_fs_validate_params(new_mp); if (error) return error; @@ -1793,7 +1834,7 @@ xfs_fc_reconfigure( return 0; } -static void xfs_fc_free( +static void xfs_fs_free( struct fs_context *fc) { struct xfs_mount *mp = fc->s_fs_info; @@ -1809,10 +1850,10 @@ static void xfs_fc_free( } static const struct fs_context_operations xfs_context_ops = { - .parse_param = xfs_fc_parse_param, - .get_tree = xfs_fc_get_tree, - .reconfigure = xfs_fc_reconfigure, - .free = xfs_fc_free, + .parse_param = xfs_fs_parse_param, + .get_tree = xfs_fs_get_tree, + .reconfigure = xfs_fs_reconfigure, + .free = xfs_fs_free, }; static int xfs_init_fs_context( diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 8e88a7ca387e..1f43fd7f3209 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -154,7 +154,7 @@ xfs_symlink( const char *cur_chunk; int byte_cnt; int n; - xfs_buf_t *bp; + struct xfs_buf *bp; prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; @@ -365,7 +365,7 @@ STATIC int xfs_inactive_symlink_rmt( struct xfs_inode *ip) { - xfs_buf_t *bp; + struct xfs_buf *bp; int done; int error; int i; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 86951652d3ed..5a263ae3d4f0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -103,6 +103,24 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list); DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list); +TRACE_EVENT(xlog_intent_recovery_failed, + TP_PROTO(struct xfs_mount *mp, int error, void *function), + TP_ARGS(mp, error, function), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(void *, function) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->error = error; + __entry->function = function; + ), + TP_printk("dev %d:%d error %d function %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->error, __entry->function) +); + DECLARE_EVENT_CLASS(xfs_perag_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, unsigned long caller_ip), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c94e71f741b6..e72730f85af1 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -465,7 +465,7 @@ xfs_trans_apply_sb_deltas( xfs_trans_t *tp) { xfs_dsb_t *sbp; - xfs_buf_t *bp; + struct xfs_buf *bp; int whole = 0; bp = xfs_trans_getsb(tp); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 42d63b830cb9..9aced0a00003 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -121,7 +121,7 @@ xfs_trans_get_buf_map( xfs_buf_flags_t flags, struct xfs_buf **bpp) { - xfs_buf_t *bp; + struct xfs_buf *bp; struct xfs_buf_log_item *bip; int error; @@ -401,7 +401,7 @@ xfs_trans_brelse( void xfs_trans_bhold( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -422,7 +422,7 @@ xfs_trans_bhold( void xfs_trans_bhold_release( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -538,7 +538,7 @@ xfs_trans_log_buf( void xfs_trans_binval( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; int i; @@ -593,7 +593,7 @@ xfs_trans_binval( void xfs_trans_inode_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -618,7 +618,7 @@ xfs_trans_inode_buf( void xfs_trans_stale_inode_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -643,7 +643,7 @@ xfs_trans_stale_inode_buf( void xfs_trans_inode_alloc_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -737,7 +737,7 @@ xfs_trans_buf_copy_type( void xfs_trans_dquot_buf( xfs_trans_t *tp, - xfs_buf_t *bp, + struct xfs_buf *bp, uint type) { struct xfs_buf_log_item *bip = bp->b_log_item; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index fe45b0c3970c..28b8ac701919 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -84,13 +84,6 @@ xfs_trans_dup_dqinfo( xfs_trans_alloc_dqinfo(ntp); - /* - * Because the quota blk reservation is carried forward, - * it is also necessary to carry forward the DQ_DIRTY flag. - */ - if (otp->t_flags & XFS_TRANS_DQ_DIRTY) - ntp->t_flags |= XFS_TRANS_DQ_DIRTY; - for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { oqa = otp->t_dqinfo->dqs[j]; nqa = ntp->t_dqinfo->dqs[j]; @@ -143,9 +136,6 @@ xfs_trans_mod_dquot_byino( xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return; - if (tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) @@ -204,6 +194,9 @@ xfs_trans_mod_dquot( ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); qtrx = NULL; + if (!delta) + return; + if (tp->t_dqinfo == NULL) xfs_trans_alloc_dqinfo(tp); /* @@ -215,10 +208,8 @@ xfs_trans_mod_dquot( if (qtrx->qt_dquot == NULL) qtrx->qt_dquot = dqp; - if (delta) { - trace_xfs_trans_mod_dquot_before(qtrx); - trace_xfs_trans_mod_dquot(tp, dqp, field, delta); - } + trace_xfs_trans_mod_dquot_before(qtrx); + trace_xfs_trans_mod_dquot(tp, dqp, field, delta); switch (field) { /* regular disk blk reservation */ @@ -271,10 +262,7 @@ xfs_trans_mod_dquot( ASSERT(0); } - if (delta) - trace_xfs_trans_mod_dquot_after(qtrx); - - tp->t_flags |= XFS_TRANS_DQ_DIRTY; + trace_xfs_trans_mod_dquot_after(qtrx); } @@ -351,7 +339,7 @@ xfs_trans_apply_dquot_deltas( int64_t totalbdelta; int64_t totalrtbdelta; - if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (!tp->t_dqinfo) return; ASSERT(tp->t_dqinfo); @@ -493,7 +481,7 @@ xfs_trans_unreserve_and_mod_dquots( struct xfs_dqtrx *qtrx, *qa; bool locked; - if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (!tp->t_dqinfo) return; for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { @@ -698,16 +686,10 @@ xfs_trans_dqresv( * because we don't have the luxury of a transaction envelope then. */ if (tp) { - ASSERT(tp->t_dqinfo); ASSERT(flags & XFS_QMOPT_RESBLK_MASK); - if (nblks != 0) - xfs_trans_mod_dquot(tp, dqp, - flags & XFS_QMOPT_RESBLK_MASK, - nblks); - if (ninos != 0) - xfs_trans_mod_dquot(tp, dqp, - XFS_TRANS_DQ_RES_INOS, - ninos); + xfs_trans_mod_dquot(tp, dqp, flags & XFS_QMOPT_RESBLK_MASK, + nblks); + xfs_trans_mod_dquot(tp, dqp, XFS_TRANS_DQ_RES_INOS, ninos); } ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count); ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count); @@ -752,9 +734,6 @@ xfs_trans_reserve_quota_bydquots( if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; - if (tp && tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - ASSERT(flags & XFS_QMOPT_RESBLK_MASK); if (udqp) { |