diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2015-02-17 17:25:57 +0000 |
---|---|---|
committer | <> | 2015-03-17 16:26:24 +0000 |
commit | 780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch) | |
tree | 598f8b9fa431b228d29897e798de4ac0c1d3d970 /src/fileops | |
parent | 7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff) | |
download | berkeleydb-db-6.1.23.tar.gz |
Diffstat (limited to 'src/fileops')
-rw-r--r-- | src/fileops/fileops.src | 91 | ||||
-rw-r--r-- | src/fileops/fileops_auto.c | 72 | ||||
-rw-r--r-- | src/fileops/fileops_autop.c | 122 | ||||
-rw-r--r-- | src/fileops/fop_basic.c | 216 | ||||
-rw-r--r-- | src/fileops/fop_rec.c | 759 | ||||
-rw-r--r-- | src/fileops/fop_util.c | 89 |
6 files changed, 1258 insertions, 91 deletions
diff --git a/src/fileops/fileops.src b/src/fileops/fileops.src index cdb6af27..3cb874b7 100644 --- a/src/fileops/fileops.src +++ b/src/fileops/fileops.src @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -30,7 +30,14 @@ ARG appname u_int32_t lu ARG mode u_int32_t o END -BEGIN create 48 143 +BEGIN_COMPAT create 60 143 +DBT name DBT s +DBT dirname DBT s +ARG appname u_int32_t lu +ARG mode u_int32_t o +END + +BEGIN create 60p1 143 DBT name DBT s DBT dirname DBT s ARG appname u_int32_t lu @@ -43,7 +50,13 @@ END * name: name in the file system * appname: indicates if the name needs to go through __db_appname */ -BEGIN remove 42 144 +BEGIN_COMPAT remove 60 144 +DBT name DBT s +DBT fid DBT s +ARG appname u_int32_t lu +END + +BEGIN remove 60p1 144 DBT name DBT s DBT fid DBT s ARG appname u_int32_t lu @@ -71,7 +84,18 @@ DBT page DBT s ARG flag u_int32_t lu END -BEGIN write 48 145 +BEGIN_COMPAT write 60 145 +DBT name DBT s +DBT dirname DBT s +ARG appname u_int32_t lu +ARG pgsize u_int32_t lu +ARG pageno db_pgno_t lu +ARG offset u_int32_t lu +DBT page DBT s +ARG flag u_int32_t lu +END + +BEGIN write 60p1 145 DBT name DBT s DBT dirname DBT s ARG appname u_int32_t lu @@ -83,6 +107,42 @@ ARG flag u_int32_t lu END /* + * write_file: log the writing of data into a file. + * + * name: file containing the data. + * appname: indicates if the name needs to go through __db_appname + * offset_lo: offset in the file, low part of a 64 bit integer. + * offset_hi: offset in the file, high part of a 64 bit integer. + * old_data: Data being overwritten, if there is any + * new_data: Data being written to the file. + * flag: DB_FOP_APPEND (0x00000001), DB_FOP_CREATE (0x00000002) and + * DB_FOP_REDO (0x00000008). Used to tell how the operation can be + * undone, truncating in the case of append and deleting the file in + * the case of create, and whether enough information was logged so + * that the operation can be redone. + */ +BEGIN_COMPAT write_file 60 86 +DBT name DBT s +DBT dirname DBT s +ARG appname u_int32_t lu +ARG offset_lo u_int32_t lu +ARG offset_hi u_int32_t lu +DBT old_data DBT s +DBT new_data DBT s +ARG flag u_int32_t lu +END + +BEGIN write_file 60p1 86 +DBT name DBT s +DBT dirname DBT s +ARG appname u_int32_t lu +LONGARG offset u_int64_t llu +DBT old_data DBT s +DBT new_data DBT s +ARG flag u_int32_t lu +END + +/* * rename: move a file from one name to another. * The appname value indicates if this is a path name that should be used * directly (i.e., no interpretation) or if it is a pathname that should @@ -105,8 +165,17 @@ DBT fileid DBT s ARG appname u_int32_t lu END -BEGIN rename 48 146 -DUPLICATE rename_noundo 46 150 +BEGIN_COMPAT rename 60 146 +DUPLICATE rename_noundo 60 150 +DBT oldname DBT s +DBT newname DBT s +DBT dirname DBT s +DBT fileid DBT s +ARG appname u_int32_t lu +END + +BEGIN rename 60p1 146 +DUPLICATE rename_noundo 60p1 150 DBT oldname DBT s DBT newname DBT s DBT dirname DBT s @@ -128,7 +197,15 @@ END * child: The transaction that removed or renamed the file. */ */ -BEGIN file_remove 42 141 +BEGIN_COMPAT file_remove 60 141 +DBT real_fid DBT s +DBT tmp_fid DBT s +DBT name DBT s +ARG appname u_int32_t lu +ARG child u_int32_t lx +END + +BEGIN file_remove 60p1 141 DBT real_fid DBT s DBT tmp_fid DBT s DBT name DBT s diff --git a/src/fileops/fileops_auto.c b/src/fileops/fileops_auto.c index 0db619a5..eff1377b 100644 --- a/src/fileops/fileops_auto.c +++ b/src/fileops/fileops_auto.c @@ -14,6 +14,13 @@ DB_LOG_RECSPEC __fop_create_42_desc[] = { {LOGREC_ARG, SSZ(__fop_create_42_args, mode), "mode", "%o"}, {LOGREC_Done, 0, "", ""} }; +DB_LOG_RECSPEC __fop_create_60_desc[] = { + {LOGREC_DBT, SSZ(__fop_create_60_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__fop_create_60_args, dirname), "dirname", ""}, + {LOGREC_ARG, SSZ(__fop_create_60_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_create_60_args, mode), "mode", "%o"}, + {LOGREC_Done, 0, "", ""} +}; DB_LOG_RECSPEC __fop_create_desc[] = { {LOGREC_DBT, SSZ(__fop_create_args, name), "name", ""}, {LOGREC_DBT, SSZ(__fop_create_args, dirname), "dirname", ""}, @@ -21,6 +28,12 @@ DB_LOG_RECSPEC __fop_create_desc[] = { {LOGREC_ARG, SSZ(__fop_create_args, mode), "mode", "%o"}, {LOGREC_Done, 0, "", ""} }; +DB_LOG_RECSPEC __fop_remove_60_desc[] = { + {LOGREC_DBT, SSZ(__fop_remove_60_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__fop_remove_60_args, fid), "fid", ""}, + {LOGREC_ARG, SSZ(__fop_remove_60_args, appname), "appname", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; DB_LOG_RECSPEC __fop_remove_desc[] = { {LOGREC_DBT, SSZ(__fop_remove_args, name), "name", ""}, {LOGREC_DBT, SSZ(__fop_remove_args, fid), "fid", ""}, @@ -37,6 +50,17 @@ DB_LOG_RECSPEC __fop_write_42_desc[] = { {LOGREC_ARG, SSZ(__fop_write_42_args, flag), "flag", "%lu"}, {LOGREC_Done, 0, "", ""} }; +DB_LOG_RECSPEC __fop_write_60_desc[] = { + {LOGREC_DBT, SSZ(__fop_write_60_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__fop_write_60_args, dirname), "dirname", ""}, + {LOGREC_ARG, SSZ(__fop_write_60_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_60_args, pgsize), "pgsize", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_60_args, pageno), "pageno", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_60_args, offset), "offset", "%lu"}, + {LOGREC_DBT, SSZ(__fop_write_60_args, page), "page", ""}, + {LOGREC_ARG, SSZ(__fop_write_60_args, flag), "flag", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; DB_LOG_RECSPEC __fop_write_desc[] = { {LOGREC_DBT, SSZ(__fop_write_args, name), "name", ""}, {LOGREC_DBT, SSZ(__fop_write_args, dirname), "dirname", ""}, @@ -48,6 +72,27 @@ DB_LOG_RECSPEC __fop_write_desc[] = { {LOGREC_ARG, SSZ(__fop_write_args, flag), "flag", "%lu"}, {LOGREC_Done, 0, "", ""} }; +DB_LOG_RECSPEC __fop_write_file_60_desc[] = { + {LOGREC_DBT, SSZ(__fop_write_file_60_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__fop_write_file_60_args, dirname), "dirname", ""}, + {LOGREC_ARG, SSZ(__fop_write_file_60_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_file_60_args, offset_lo), "offset_lo", "%lu"}, + {LOGREC_ARG, SSZ(__fop_write_file_60_args, offset_hi), "offset_hi", "%lu"}, + {LOGREC_DBT, SSZ(__fop_write_file_60_args, old_data), "old_data", ""}, + {LOGREC_DBT, SSZ(__fop_write_file_60_args, new_data), "new_data", ""}, + {LOGREC_ARG, SSZ(__fop_write_file_60_args, flag), "flag", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_write_file_desc[] = { + {LOGREC_DBT, SSZ(__fop_write_file_args, name), "name", ""}, + {LOGREC_DBT, SSZ(__fop_write_file_args, dirname), "dirname", ""}, + {LOGREC_ARG, SSZ(__fop_write_file_args, appname), "appname", "%lu"}, + {LOGREC_LONGARG, SSZ(__fop_write_file_args, offset), "offset", ""}, + {LOGREC_DBT, SSZ(__fop_write_file_args, old_data), "old_data", ""}, + {LOGREC_DBT, SSZ(__fop_write_file_args, new_data), "new_data", ""}, + {LOGREC_ARG, SSZ(__fop_write_file_args, flag), "flag", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; DB_LOG_RECSPEC __fop_rename_42_desc[] = { {LOGREC_DBT, SSZ(__fop_rename_42_args, oldname), "oldname", ""}, {LOGREC_DBT, SSZ(__fop_rename_42_args, newname), "newname", ""}, @@ -62,6 +107,22 @@ DB_LOG_RECSPEC __fop_rename_noundo_46_desc[] = { {LOGREC_ARG, SSZ(__fop_rename_42_args, appname), "appname", "%lu"}, {LOGREC_Done, 0, "", ""} }; +DB_LOG_RECSPEC __fop_rename_60_desc[] = { + {LOGREC_DBT, SSZ(__fop_rename_60_args, oldname), "oldname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_60_args, newname), "newname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_60_args, dirname), "dirname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_60_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__fop_rename_60_args, appname), "appname", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; +DB_LOG_RECSPEC __fop_rename_noundo_60_desc[] = { + {LOGREC_DBT, SSZ(__fop_rename_60_args, oldname), "oldname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_60_args, newname), "newname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_60_args, dirname), "dirname", ""}, + {LOGREC_DBT, SSZ(__fop_rename_60_args, fileid), "fileid", ""}, + {LOGREC_ARG, SSZ(__fop_rename_60_args, appname), "appname", "%lu"}, + {LOGREC_Done, 0, "", ""} +}; DB_LOG_RECSPEC __fop_rename_desc[] = { {LOGREC_DBT, SSZ(__fop_rename_args, oldname), "oldname", ""}, {LOGREC_DBT, SSZ(__fop_rename_args, newname), "newname", ""}, @@ -78,6 +139,14 @@ DB_LOG_RECSPEC __fop_rename_noundo_desc[] = { {LOGREC_ARG, SSZ(__fop_rename_args, appname), "appname", "%lu"}, {LOGREC_Done, 0, "", ""} }; +DB_LOG_RECSPEC __fop_file_remove_60_desc[] = { + {LOGREC_DBT, SSZ(__fop_file_remove_60_args, real_fid), "real_fid", ""}, + {LOGREC_DBT, SSZ(__fop_file_remove_60_args, tmp_fid), "tmp_fid", ""}, + {LOGREC_DBT, SSZ(__fop_file_remove_60_args, name), "name", ""}, + {LOGREC_ARG, SSZ(__fop_file_remove_60_args, appname), "appname", "%lu"}, + {LOGREC_ARG, SSZ(__fop_file_remove_60_args, child), "child", "%lx"}, + {LOGREC_Done, 0, "", ""} +}; DB_LOG_RECSPEC __fop_file_remove_desc[] = { {LOGREC_DBT, SSZ(__fop_file_remove_args, real_fid), "real_fid", ""}, {LOGREC_DBT, SSZ(__fop_file_remove_args, tmp_fid), "tmp_fid", ""}, @@ -106,6 +175,9 @@ __fop_init_recover(env, dtabp) __fop_write_recover, DB___fop_write)) != 0) return (ret); if ((ret = __db_add_recovery_int(env, dtabp, + __fop_write_file_recover, DB___fop_write_file)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, __fop_rename_recover, DB___fop_rename)) != 0) return (ret); if ((ret = __db_add_recovery_int(env, dtabp, diff --git a/src/fileops/fileops_autop.c b/src/fileops/fileops_autop.c index 6e271a17..784aa1d0 100644 --- a/src/fileops/fileops_autop.c +++ b/src/fileops/fileops_autop.c @@ -27,6 +27,23 @@ __fop_create_42_print(env, dbtp, lsnp, notused2, info) } /* + * PUBLIC: int __fop_create_60_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_create_60_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_create_60", __fop_create_60_desc, info)); +} + +/* * PUBLIC: int __fop_create_print __P((ENV *, DBT *, DB_LSN *, * PUBLIC: db_recops, void *)); */ @@ -44,6 +61,23 @@ __fop_create_print(env, dbtp, lsnp, notused2, info) } /* + * PUBLIC: int __fop_remove_60_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_remove_60_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_remove_60", __fop_remove_60_desc, info)); +} + +/* * PUBLIC: int __fop_remove_print __P((ENV *, DBT *, DB_LSN *, * PUBLIC: db_recops, void *)); */ @@ -78,6 +112,23 @@ __fop_write_42_print(env, dbtp, lsnp, notused2, info) } /* + * PUBLIC: int __fop_write_60_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_60_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_write_60", __fop_write_60_desc, info)); +} + +/* * PUBLIC: int __fop_write_print __P((ENV *, DBT *, DB_LSN *, * PUBLIC: db_recops, void *)); */ @@ -95,6 +146,40 @@ __fop_write_print(env, dbtp, lsnp, notused2, info) } /* + * PUBLIC: int __fop_write_file_60_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_file_60_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_write_file_60", __fop_write_file_60_desc, info)); +} + +/* + * PUBLIC: int __fop_write_file_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_write_file_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_write_file", __fop_write_file_desc, info)); +} + +/* * PUBLIC: int __fop_rename_42_print __P((ENV *, DBT *, DB_LSN *, * PUBLIC: db_recops, void *)); */ @@ -112,6 +197,23 @@ __fop_rename_42_print(env, dbtp, lsnp, notused2, info) } /* + * PUBLIC: int __fop_rename_60_print __P((ENV *, DBT *, DB_LSN *, + * PUBLIC: db_recops, void *)); + */ +int +__fop_rename_60_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_rename_60", __fop_rename_60_desc, info)); +} + +/* * PUBLIC: int __fop_rename_print __P((ENV *, DBT *, DB_LSN *, * PUBLIC: db_recops, void *)); */ @@ -129,6 +231,23 @@ __fop_rename_print(env, dbtp, lsnp, notused2, info) } /* + * PUBLIC: int __fop_file_remove_60_print __P((ENV *, DBT *, + * PUBLIC: DB_LSN *, db_recops, void *)); + */ +int +__fop_file_remove_60_print(env, dbtp, lsnp, notused2, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops notused2; + void *info; +{ + COMPQUIET(notused2, DB_TXN_PRINT); + + return (__log_print_record(env, dbtp, lsnp, "__fop_file_remove_60", __fop_file_remove_60_desc, info)); +} + +/* * PUBLIC: int __fop_file_remove_print __P((ENV *, DBT *, DB_LSN *, * PUBLIC: db_recops, void *)); */ @@ -165,6 +284,9 @@ __fop_init_print(env, dtabp) __fop_write_print, DB___fop_write)) != 0) return (ret); if ((ret = __db_add_recovery_int(env, dtabp, + __fop_write_file_print, DB___fop_write_file)) != 0) + return (ret); + if ((ret = __db_add_recovery_int(env, dtabp, __fop_rename_print, DB___fop_rename)) != 0) return (ret); if ((ret = __db_add_recovery_int(env, dtabp, diff --git a/src/fileops/fop_basic.c b/src/fileops/fop_basic.c index d6c707f2..c1280d76 100644 --- a/src/fileops/fop_basic.c +++ b/src/fileops/fop_basic.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -253,6 +253,220 @@ err: if (local_open && } /* + * Used to reduce the maximum amount of data that will be logged at a time. + * Large writes are logged as a series of smaller writes to prevent a + * single log from being larger than the log buffer or a log file. + */ +#define LOG_OVERWRITE_MULTIPLIER 0.75 +#define LOG_REDO_MULTIPLIER 0.75 +#define LOG_OVERWRITE_REDO_MULTIPLIER 0.33 + +/* + * __fop_write_file + * + * Write "size" bytes from "buf" to file "name" beginning at offset "off." + * dirname is the directory in which the file is stored, fhp the file + * handle to write too, and flags contains whether this is creating or + * appending data, which changes how the data is logged. + * The other __fop_write is designed for writing pages to databases, this + * function writes generic data to files, usually blob files. + * + * PUBLIC: int __fop_write_file __P((ENV *, DB_TXN *, + * PUBLIC: const char *, const char *, APPNAME, DB_FH *, + * PUBLIC: off_t, void *, size_t, u_int32_t)); + */ +int +__fop_write_file(env, txn, + name, dirname, appname, fhp, off, buf, size, flags) + ENV *env; + DB_TXN *txn; + const char *name, *dirname; + APPNAME appname; + DB_FH *fhp; + off_t off; + void *buf; + size_t size; + u_int32_t flags; +{ + DBT new_data, old_data, namedbt, dirdbt; + DB_LOG *dblp; + DB_LSN lsn; + off_t cur_off; + int local_open, ret, t_ret; + size_t cur_size, nbytes, tmp_size; + u_int32_t lflags, lgbuf_size, lgsize, lgfile_size; + char *real_name; + void *cur_ptr; + + ret = local_open = 0; + real_name = NULL; + lflags = 0; + memset(&new_data, 0, sizeof(new_data)); + memset(&old_data, 0, sizeof(old_data)); + ZERO_LSN(lsn); + + if (fhp == NULL) { + /* File isn't open; we need to reopen it. */ + if ((ret = __db_appname(env, + appname, name, &dirname, &real_name)) != 0) + return (ret); + + if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) + goto err; + local_open = 1; + } + + if (DBENV_LOGGING(env) +#if !defined(DEBUG_WOP) + && txn != NULL +#endif + ) { + DB_INIT_DBT(namedbt, name, strlen(name) + 1); + if (dirname != NULL) + DB_INIT_DBT(dirdbt, dirname, strlen(dirname) + 1); + else + memset(&dirdbt, 0, sizeof(dirdbt)); + /* + * If the write is larger than the log buffer or file size, + * then log it as a set of smaller writes. + */ + cur_off = off; + cur_ptr = buf; + cur_size = size; + dblp = env->lg_handle; + LOG_SYSTEM_LOCK(env); + lgfile_size = ((LOG *)dblp->reginfo.primary)->log_nsize; + LOG_SYSTEM_UNLOCK(env); + if ((ret = __log_get_lg_bsize(env->dbenv, &lgbuf_size)) != 0) + goto err; + + if (lgfile_size > lgbuf_size) + lgsize = lgbuf_size; + else + lgsize = lgfile_size; + + /* + * Parial logging only logs enough data to undo an operation. + */ + if (LF_ISSET(DB_FOP_PARTIAL_LOG)) { + /* No data needs to be logged for append and create. */ + if (LF_ISSET(DB_FOP_APPEND | DB_FOP_CREATE)) { + lflags |= + flags & (DB_FOP_APPEND | DB_FOP_CREATE); + cur_size = 0; + goto log; + } else { + /* + * Writting in the middle of the blob requires + * logging the data being overwritten. + */ + lgsize = (u_int32_t) + (lgsize * LOG_OVERWRITE_MULTIPLIER); + } + } else { + /* Log that the operation can be redone from logs. */ + lflags |= DB_FOP_REDO; + /* Just log the new data for append and create */ + if (LF_ISSET(DB_FOP_APPEND | DB_FOP_CREATE)) { + lgsize = (u_int32_t) + (lgsize * LOG_REDO_MULTIPLIER); + lflags |= flags & + (DB_FOP_APPEND | DB_FOP_CREATE); + } else { + /* + * Writting in the middle of the blob requires + * logging both the old and new data. + */ + lgsize = (u_int32_t) + (lgsize * LOG_OVERWRITE_REDO_MULTIPLIER); + } + } + + while (cur_size > 0) { + new_data.data = cur_ptr; + if (cur_size > lgsize) { + new_data.size = lgsize; + cur_size -= lgsize; + } else { + new_data.size = (u_int32_t)cur_size; + cur_size = 0; + } + cur_ptr = (unsigned char *)cur_ptr + new_data.size; + /* + * If not creating or appending the file, then + * the data being overwritten needs to be read + * in so it can be written back in on abort. + */ + if (!(lflags & (DB_FOP_CREATE | DB_FOP_APPEND))) { + DB_ASSERT(env, old_data.data == NULL || + new_data.size <= old_data.size); + old_data.size = new_data.size; + if (old_data.data == NULL) { + if ((ret = __os_malloc(env, + old_data.size, + &old_data.data)) != 0) + goto err; + } + if ((ret = __os_seek( + env, fhp, 0, 0, cur_off)) != 0) + goto err; + if ((ret = __os_read(env, fhp, old_data.data, + old_data.size, &nbytes)) != 0) + goto err; + } +log: tmp_size = new_data.size; + /* + * No need to log the new data if this operation + * cannot be redone from logs. + */ + if (!(lflags & DB_FOP_REDO)) + memset(&new_data, 0, sizeof(new_data)); + if ((ret = __fop_write_file_log( + env, txn, &lsn, flags, &namedbt, &dirdbt, + (u_int32_t)appname, (u_int64_t)cur_off, + &old_data, &new_data, lflags)) != 0) + goto err; + cur_off += tmp_size; + } + /* + * If not creating, we have to flush the logs so that they + * will be available to undo internal writes and appends in case + * of a crash. + */ + if (!(LF_ISSET(DB_FOP_CREATE)) && + txn != NULL && !F_ISSET(txn, TXN_NOSYNC)) + if ((ret = __log_flush(env, &lsn)) != 0) + goto err; + } + + /* Seek to offset. */ + if ((ret = __os_seek(env, fhp, 0, 0, off)) != 0) + goto err; + + /* Now do the write. */ + if ((ret = __os_write(env, fhp, buf, size, &nbytes)) != 0) + goto err; + + if (nbytes != size) { + __db_errx(env, DB_STR_A("0238", + "Error wrote %lld bytes to file %s instead of %lld .", + "%lld %s %lld"), + (long long)nbytes, name, (long long)size); + goto err; + } + +err: if (local_open && + (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) + ret = t_ret; + + if (real_name != NULL) + __os_free(env, real_name); + if (old_data.data != NULL) + __os_free(env, old_data.data); + return (ret); +} + +/* * __fop_rename -- * Change a file's name. * diff --git a/src/fileops/fop_rec.c b/src/fileops/fop_rec.c index 52d6175d..71a81ad6 100644 --- a/src/fileops/fop_rec.c +++ b/src/fileops/fop_rec.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -9,16 +9,63 @@ #include "db_config.h" #include "db_int.h" +#include "dbinc/blob.h" #include "dbinc/db_page.h" #include "dbinc/fop.h" #include "dbinc/db_am.h" #include "dbinc/mp.h" #include "dbinc/txn.h" +typedef enum { + DB_APP53_NONE=0, /* No type (region). */ + DB_APP53_DATA, /* Data file. */ + DB_APP53_LOG, /* Log file. */ + DB_APP53_META, /* Persistent metadata file. */ + DB_APP53_RECOVER, /* We are in recovery. */ + DB_APP53_TMP /* Temporary file. */ +} APPNAME53; + +static APPNAME __fop_convert_appname __P((ENV *, APPNAME53)); +static int __fop_create_recover_int __P((ENV *, char *, db_recops, int)); static int __fop_rename_recover_int __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int)); +static int __fop_rename_60_recover_int + __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int)); static int __fop_rename_42_recover_int __P((ENV *, DBT *, DB_LSN *, db_recops, void *, int)); +static int __fop_write_file_recover_int + __P((ENV *, db_recops, + APPNAME, u_int32_t, DBT *, DBT *, DBT *, DBT *, off_t, DB_TXN *)); + +/* + * The APPNAME enumermation was changed in 6.0 to include DB_APP_BLOB. APPNAME + * is used by the log records __fop_create, __fop_write, and __fop_rename. + * __fop_write_file also includes an APPNAME field, but that record was created + * in 6.0. + */ +static APPNAME +__fop_convert_appname(env, appname) + ENV *env; + APPNAME53 appname; +{ + switch(appname) + { + case DB_APP53_NONE: + return (DB_APP_NONE); + case DB_APP53_DATA: + return (DB_APP_DATA); + case DB_APP53_LOG: + return (DB_APP_LOG); + case DB_APP53_META: + return (DB_APP_META); + case DB_APP53_RECOVER: + return (DB_APP_RECOVER); + case DB_APP53_TMP: + return (DB_APP_TMP); + } + DB_ASSERT(env, 0); + return (DB_APP_NONE); +} /* * The transactional guarantees Berkeley DB provides for file @@ -50,6 +97,85 @@ static int __fop_rename_42_recover_int * it does not apply. */ +static int +__fop_create_recover_int(env, real_name, op, mode) + ENV *env; + char *real_name; + db_recops op; + int mode; +{ + DB_FH *fhp; + DBMETA *meta; + u_int8_t mbuf[DBMETASIZE]; + int ret; + char *path; +#ifdef HAVE_REPLICATION + DELAYED_BLOB_LIST *dbl; + int view_partial; + + dbl = NULL; +#endif + meta = (DBMETA *)mbuf; + ret = 0; + + if (DB_UNDO(op)) { + /* + * If the file was opened in mpool, we must mark it as + * dead via nameop which will also unlink the file. + */ + if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) { + if (__fop_read_meta(env, + real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && + __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0) { + if ((ret = __memp_nameop(env, + meta->uid, NULL, real_name, NULL, 0)) != 0) + goto out; + } else { + (void)__os_closehandle(env, fhp); + goto do_unlink; + } + (void)__os_closehandle(env, fhp); + } else +do_unlink: (void)__os_unlink(env, real_name, 0); + } else if (DB_REDO(op)) { + path = real_name; +#ifdef DB_WIN32 + /* + * Absolute paths on windows can result in it creating a + * "C" or "D" directory in the working directory. + */ + if (__os_abspath(real_name)) + path += 2; +#endif + +#ifdef HAVE_REPLICATION + /* + * Prevent replication of blob files if their owning database + * is not replicated. + */ + if (IS_VIEW_SITE(env) && IS_BLOB_FILE(path)) { + if ((ret = __rep_call_partial(env, + path, &view_partial, 0, &dbl)) != 0) + goto out; + DB_ASSERT(env, dbl == NULL); + if (view_partial == 0) + goto out; + } +#endif + /* Blob directories might not exist yet. */ + if (__os_exists(env, real_name, NULL) != 0 && + (ret = __db_mkpath(env, path)) != 0) + goto out; + + if ((ret = __os_open(env, real_name, + 0, DB_OSO_CREATE, mode, &fhp)) == 0) + (void)__os_closehandle(env, fhp); + else + goto out; + } +out: return (ret); +} + /* * __fop_create_recover -- * Recovery function for create. @@ -66,9 +192,6 @@ __fop_create_recover(env, dbtp, lsnp, op, info) void *info; { __fop_create_args *argp; - DB_FH *fhp; - DBMETA *meta; - u_int8_t mbuf[DBMETASIZE]; int ret; char *real_name; const char *dirname; @@ -78,7 +201,6 @@ __fop_create_recover(env, dbtp, lsnp, op, info) real_name = NULL; REC_PRINT(__fop_create_print); REC_NOOP_INTRO(__fop_create_read); - meta = (DBMETA *)mbuf; if (argp->dirname.size == 0) dirname = NULL; @@ -90,32 +212,60 @@ __fop_create_recover(env, dbtp, lsnp, op, info) (const char *)argp->name.data, &dirname, &real_name)) != 0) goto out; - if (DB_UNDO(op)) { - /* - * If the file was opened in mpool, we must mark it as - * dead via nameop which will also unlink the file. - */ - if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) { - if (__fop_read_meta(env, - real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && - __db_chk_meta(env, NULL, meta, 1) == 0) { - if ((ret = __memp_nameop(env, - meta->uid, NULL, real_name, NULL, 0)) != 0) - goto out; - } else { - (void)__os_closehandle(env, fhp); - goto do_unlink; - } - (void)__os_closehandle(env, fhp); - } else -do_unlink: (void)__os_unlink(env, real_name, 0); - } else if (DB_REDO(op)) { - if ((ret = __os_open(env, real_name, 0, - DB_OSO_CREATE, (int)argp->mode, &fhp)) == 0) - (void)__os_closehandle(env, fhp); - else - goto out; - } + if ((ret = __fop_create_recover_int( + env, real_name, op, (int)argp->mode)) != 0) + goto out; + + *lsnp = argp->prev_lsn; + +out: if (real_name != NULL) + __os_free(env, real_name); + + REC_NOOP_CLOSE; +} + +/* + * __fop_create_60_recover -- + * Recovery function for create. + * + * PUBLIC: int __fop_create_60_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_create_60_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_create_60_args *argp; + APPNAME appname; + int ret; + char *real_name; + const char *dirname; + + COMPQUIET(info, NULL); + + real_name = NULL; + REC_PRINT(__fop_create_60_print); + REC_NOOP_INTRO(__fop_create_60_read); + + if (argp->dirname.size == 0) + dirname = NULL; + else + dirname = (const char *)argp->dirname.data; + + appname = __fop_convert_appname(env, (APPNAME53)argp->appname); + + if ((ret = __db_appname(env, + appname == DB_APP_DATA ? DB_APP_RECOVER : appname, + (const char *)argp->name.data, &dirname, &real_name)) != 0) + goto out; + + if ((ret = __fop_create_recover_int( + env, real_name, op, (int)argp->mode)) != 0) + goto out; *lsnp = argp->prev_lsn; @@ -144,6 +294,7 @@ __fop_create_42_recover(env, dbtp, lsnp, op, info) DB_FH *fhp; DBMETA *meta; u_int8_t mbuf[DBMETASIZE]; + APPNAME appname; int ret; char *real_name; @@ -153,8 +304,9 @@ __fop_create_42_recover(env, dbtp, lsnp, op, info) REC_PRINT(__fop_create_print); REC_NOOP_INTRO(__fop_create_read); meta = (DBMETA *)mbuf; + appname = __fop_convert_appname(env, (APPNAME53)argp->appname); - if ((ret = __db_appname(env, (APPNAME)argp->appname, + if ((ret = __db_appname(env, appname, (const char *)argp->name.data, NULL, &real_name)) != 0) goto out; @@ -166,7 +318,7 @@ __fop_create_42_recover(env, dbtp, lsnp, op, info) if (__os_open(env, real_name, 0, 0, 0, &fhp) == 0) { if (__fop_read_meta(env, real_name, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && - __db_chk_meta(env, NULL, meta, 1) == 0) { + __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0) { if ((ret = __memp_nameop(env, meta->uid, NULL, real_name, NULL, 0)) != 0) goto out; @@ -232,6 +384,49 @@ out: if (real_name != NULL) } /* + * __fop_remove_60_recover -- + * Recovery function for remove. + * + * PUBLIC: int __fop_remove_60_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_remove_60_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_remove_60_args *argp; + APPNAME appname; + int ret; + char *real_name; + + COMPQUIET(info, NULL); + + real_name = NULL; + REC_PRINT(__fop_remove_60_print); + REC_NOOP_INTRO(__fop_remove_60_read); + + appname = __fop_convert_appname(env, (APPNAME53)argp->appname); + + if ((ret = __db_appname(env, appname, + (const char *)argp->name.data, NULL, &real_name)) != 0) + goto out; + + /* Its ok if the file is not there. */ + if (DB_REDO(op)) + (void)__memp_nameop(env, + (u_int8_t *)argp->fid.data, NULL, real_name, NULL, 0); + + *lsnp = argp->prev_lsn; +out: if (real_name != NULL) + __os_free(env, real_name); + REC_NOOP_CLOSE; +} + +/* * __fop_write_recover -- * Recovery function for writechunk. * @@ -251,6 +446,15 @@ __fop_write_recover(env, dbtp, lsnp, op, info) COMPQUIET(info, NULL); +#ifndef HAVE_64BIT_TYPES + COMPQUIET(dbtp, NULL); + COMPQUIET(lsnp, NULL); + COMPQUIET(op, 0); + __db_errx(env, DB_STR("0243", + "Blobs require 64 integer compiler support.")); + return (DB_OPNOTSUP); +#endif + REC_PRINT(__fop_write_print); REC_NOOP_INTRO(__fop_write_read); @@ -272,6 +476,48 @@ __fop_write_recover(env, dbtp, lsnp, op, info) } /* + * __fop_write_60_recover -- + * Recovery function for writechunk. + * + * PUBLIC: int __fop_write_60_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_write_60_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_write_60_args *argp; + APPNAME appname; + int ret; + + COMPQUIET(info, NULL); + + REC_PRINT(__fop_write_60_print); + REC_NOOP_INTRO(__fop_write_60_read); + + ret = 0; + if (DB_UNDO(op)) + DB_ASSERT(env, argp->flag != 0); + else if (DB_REDO(op)) { + appname = __fop_convert_appname(env, (APPNAME53)argp->appname); + ret = __fop_write(env, + argp->txnp, argp->name.data, + argp->dirname.size == 0 ? NULL : argp->dirname.data, + appname == DB_APP_DATA ? DB_APP_RECOVER : appname, + NULL, argp->pgsize, argp->pageno, argp->offset, + argp->page.data, argp->page.size, argp->flag, 0); + } + + if (ret == 0) + *lsnp = argp->prev_lsn; + REC_NOOP_CLOSE; +} + +/* * __fop_write_42_recover -- * Recovery function for writechunk. * @@ -287,6 +533,7 @@ __fop_write_42_recover(env, dbtp, lsnp, op, info) void *info; { __fop_write_args *argp; + APPNAME appname; int ret; COMPQUIET(info, NULL); @@ -297,18 +544,194 @@ __fop_write_42_recover(env, dbtp, lsnp, op, info) ret = 0; if (DB_UNDO(op)) DB_ASSERT(env, argp->flag != 0); - else if (DB_REDO(op)) + else if (DB_REDO(op)) { + appname = __fop_convert_appname(env, (APPNAME53)argp->appname); ret = __fop_write(env, - argp->txnp, argp->name.data, NULL, (APPNAME)argp->appname, + argp->txnp, argp->name.data, NULL, appname, NULL, argp->pgsize, argp->pageno, argp->offset, argp->page.data, argp->page.size, argp->flag, 0); + } + + if (ret == 0) + *lsnp = argp->prev_lsn; + REC_NOOP_CLOSE; +} + +static int +__fop_write_file_recover_int( + env, op, appname, flag, dirname, name, new_data, old_data, offset, txn) + ENV *env; + db_recops op; + APPNAME appname; + u_int32_t flag; + DBT *dirname; + DBT *name; + DBT *new_data; + DBT *old_data; + off_t offset; + DB_TXN *txn; +{ + DB_FH *fhp; + int ret; + size_t nbytes; + char *path; + + fhp = NULL; + path = NULL; + ret = 0; + + if (DB_UNDO(op)) { + if (flag & DB_FOP_CREATE) { + /* + * File was created in this transaction. Do nothing, + * destroying the file will undo the write. + */ + } else { + if ((ret = __db_appname(env, + appname == DB_APP_DATA ? DB_APP_RECOVER : + appname, name->data, NULL, &path)) != 0) + goto end; + + if (__os_open(env, path, 0, 0, DB_MODE_600, &fhp) != 0) + goto end; + + if (flag & DB_FOP_APPEND) { + /* + * Appended to the end of the file, undo by + * truncating the file. + */ + (void)__os_truncate(env, fhp, 0, 0, offset); + } else { + /* + * Data overwritten in the middle of the file, + * undo by writing back in the old data. + */ + + /* Seek to offset. */ + if ((__os_seek(env, fhp, 0, 0, offset)) != 0) + goto end; + + /* Now do the write. */ + ret = __os_write(env, fhp, + old_data->data, old_data->size, &nbytes); + } + } + } else if (DB_REDO(op)) { + /* + * Not all operations log enough data to be redone. Since + * files are flushed before the transaction commit this is + * not an issue, unless we are on an HA client or initializing + * from a backup. + */ + if (flag & DB_FOP_REDO) { + ret = __fop_write_file(env, txn, name->data, + dirname->size == 0 ? NULL : dirname->data, + appname == DB_APP_DATA ? DB_APP_RECOVER : appname, + NULL, offset, new_data->data, new_data->size, 0); +#ifdef HAVE_REPLICATION + /* + * Blob files of databases that are not replicated are + * also not replicated. So assume any ENOENT errors + * are because the file was not replicated. + */ + if (ret == ENOENT && IS_VIEW_SITE(env)) + ret = 0; +#endif + } else { + /* DB_ASSERT(env, !IS_REP_CLIENT(env)); */ + } + } + +end: if (path != NULL) + __os_free(env, path); + if (fhp != NULL) + (void)__os_closehandle(env, fhp); + return (ret); +} +/* + * __fop_write_file_recover -- + * Recovery function for writing to a blob file. Files are flushed before + * the transaction is committed, so often the file operations do not need + * to be redone or undone. However, since no lsn is stored in the file, + * we always try to redo or undo the operation, since it will not change + * the final state of the file if the operation is not needed. This also + * means that this function has to be very tolerant of errors, such as + * trying to open a file that was deleted, or truncate a file that is + * already short. + * + * PUBLIC: int __fop_write_file_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_write_file_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_write_file_args *argp; + int ret; + COMPQUIET(info, NULL); + +#ifndef HAVE_64BIT_TYPES + COMPQUIET(dbtp, NULL); + COMPQUIET(lsnp, NULL); + COMPQUIET(op, 0); + __db_errx(env, DB_STR("0244", + "Blobs require 64 integer compiler support.")); + return (DB_OPNOTSUP); +#endif + + REC_PRINT(__fop_write_file_print); + REC_NOOP_INTRO(__fop_write_file_read); + + ret = __fop_write_file_recover_int(env, op, + (APPNAME)argp->appname, argp->flag, &argp->dirname, &argp->name, + &argp->new_data, &argp->old_data, (off_t)argp->offset, argp->txnp); if (ret == 0) *lsnp = argp->prev_lsn; REC_NOOP_CLOSE; } /* + * __fop_write_file_60_recover -- + * + * PUBLIC: int __fop_write_file_60_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_write_file_60_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_write_file_60_args *argp; + off_t offset; + int ret; + COMPQUIET(info, NULL); + + REC_PRINT(__fop_write_file_60_print); + REC_NOOP_INTRO(__fop_write_file_60_read); + + /* The offset is stored as two u_in32_t values. */ + GET_LO_HI(env, argp->offset_lo, argp->offset_hi, offset, ret); + if (ret != 0) + goto end; + + ret = __fop_write_file_recover_int(env, op, + (APPNAME)argp->appname, argp->flag, &argp->dirname, &argp->name, + &argp->new_data, &argp->old_data, offset, argp->txnp); + +end: if (ret == 0) + *lsnp = argp->prev_lsn; + REC_NOOP_CLOSE; +} + +/* * __fop_rename_recover -- * Recovery functions for rename. There are two variants that * both use the same utility function. Had we known about this on day @@ -408,7 +831,148 @@ __fop_rename_recover_int(env, dbtp, lsnp, op, info, undo) if (__fop_read_meta(env, src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0) goto done; - if (__db_chk_meta(env, NULL, meta, 1) != 0) + if (__db_chk_meta(env, NULL, meta, DB_CHK_META) != 0) + goto done; + if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) + goto done; + (void)__os_closehandle(env, fhp); + fhp = NULL; + if (DB_REDO(op)) { + /* + * Check to see if the target file exists. If it + * does and it does not have the proper id then + * it is a later version. We just remove the source + * file since the state of the world is beyond this + * point. + */ + if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 && + __fop_read_meta(env, src, mbuf, + DBMETASIZE, fhp, 1, NULL) == 0 && + __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0 && + memcmp(argp->fileid.data, + meta->uid, DB_FILE_ID_LEN) != 0) { + (void)__memp_nameop(env, + fileid, NULL, real_old, NULL, 0); + goto done; + } + } + } + + if (undo && DB_UNDO(op)) + (void)__memp_nameop(env, fileid, + (const char *)argp->oldname.data, real_new, real_old, 0); + if (DB_REDO(op)) + (void)__memp_nameop(env, fileid, + (const char *)argp->newname.data, real_old, real_new, 0); + +done: *lsnp = argp->prev_lsn; +out: if (real_new != NULL) + __os_free(env, real_new); + if (real_old != NULL) + __os_free(env, real_old); + if (fhp != NULL) + (void)__os_closehandle(env, fhp); + + REC_NOOP_CLOSE; +} + +/* + * __fop_rename_60_recover -- + * + * PUBLIC: int __fop_rename_60_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + * + * PUBLIC: int __fop_rename_noundo_60_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ + +int +__fop_rename_60_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + return (__fop_rename_60_recover_int(env, dbtp, lsnp, op, info, 1)); +} + +int +__fop_rename_noundo_60_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + return (__fop_rename_60_recover_int(env, dbtp, lsnp, op, info, 0)); +} + +static int +__fop_rename_60_recover_int(env, dbtp, lsnp, op, info, undo) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; + int undo; +{ + __fop_rename_60_args *argp; + APPNAME appname; + DB_FH *fhp; + DBMETA *meta; + u_int8_t *fileid, mbuf[DBMETASIZE]; + int ret; + char *real_new, *real_old, *src; + const char *dirname; + + COMPQUIET(info, NULL); + + fhp = NULL; + meta = (DBMETA *)&mbuf[0]; + ret = 0; + real_new = real_old = NULL; + + REC_PRINT(__fop_rename_60_print); + REC_NOOP_INTRO(__fop_rename_60_read); + fileid = argp->fileid.data; + + if (argp->dirname.size == 0) + dirname = NULL; + else + dirname = (const char *)argp->dirname.data; + + + appname = __fop_convert_appname(env, (APPNAME53)argp->appname); + if (appname == DB_APP_DATA) + appname = DB_APP_RECOVER; + + if ((ret = __db_appname(env, appname, (const char *)argp->newname.data, + &dirname, &real_new)) != 0) + goto out; + if ((ret = __db_appname(env, appname, (const char *)argp->oldname.data, + &dirname, &real_old)) != 0) + goto out; + + /* + * Verify that we are manipulating the correct file. We should always + * be OK on an ABORT or an APPLY, but during recovery, we have to + * check. + */ + if (op != DB_TXN_ABORT && op != DB_TXN_APPLY) { + src = DB_UNDO(op) ? real_new : real_old; + /* + * Interpret any error as meaning that the file either doesn't + * exist, doesn't have a meta-data page, or is in some other + * way, shape or form, incorrect, so that we should not restore + * it. + */ + if (__os_open(env, src, 0, 0, 0, &fhp) != 0) + goto done; + if (__fop_read_meta(env, + src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0) + goto done; + if (__db_chk_meta(env, NULL, meta, DB_CHK_META) != 0) goto done; if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) goto done; @@ -425,7 +989,7 @@ __fop_rename_recover_int(env, dbtp, lsnp, op, info, undo) if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 && __fop_read_meta(env, src, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && - __db_chk_meta(env, NULL, meta, 1) == 0 && + __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0 && memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) { (void)__memp_nameop(env, @@ -501,6 +1065,7 @@ __fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo) DB_FH *fhp; DBMETA *meta; u_int8_t *fileid, mbuf[DBMETASIZE]; + APPNAME appname; int ret; char *real_new, *real_old, *src; @@ -515,10 +1080,11 @@ __fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo) REC_NOOP_INTRO(__fop_rename_read); fileid = argp->fileid.data; - if ((ret = __db_appname(env, (APPNAME)argp->appname, + appname = __fop_convert_appname(env, (APPNAME53)argp->appname); + if ((ret = __db_appname(env, appname, (const char *)argp->newname.data, NULL, &real_new)) != 0) goto out; - if ((ret = __db_appname(env, (APPNAME)argp->appname, + if ((ret = __db_appname(env, appname, (const char *)argp->oldname.data, NULL, &real_old)) != 0) goto out; @@ -540,7 +1106,7 @@ __fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo) if (__fop_read_meta(env, src, mbuf, DBMETASIZE, fhp, 1, NULL) != 0) goto done; - if (__db_chk_meta(env, NULL, meta, 1) != 0) + if (__db_chk_meta(env, NULL, meta, DB_CHK_META) != 0) goto done; if (memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) goto done; @@ -557,7 +1123,7 @@ __fop_rename_42_recover_int(env, dbtp, lsnp, op, info, undo) if (__os_open(env, real_new, 0, 0, 0, &fhp) == 0 && __fop_read_meta(env, src, mbuf, DBMETASIZE, fhp, 1, NULL) == 0 && - __db_chk_meta(env, NULL, meta, 1) == 0 && + __db_chk_meta(env, NULL, meta, DB_CHK_META) == 0 && memcmp(argp->fileid.data, meta->uid, DB_FILE_ID_LEN) != 0) { (void)__memp_nameop(env, @@ -652,7 +1218,115 @@ __fop_file_remove_recover(env, dbtp, lsnp, op, info) * We can ignore errors here since we'll simply fail the * checks below and assume this is the wrong file. */ - (void)__db_chk_meta(env, NULL, meta, 1); + (void)__db_chk_meta(env, NULL, meta, DB_CHK_META); + is_real = + memcmp(argp->real_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; + is_tmp = + memcmp(argp->tmp_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; + + if (!is_real && !is_tmp) + /* File exists, but isn't what we were removing. */ + cstat = TXN_IGNORE; + else + /* File exists and is the one that we were removing. */ + cstat = TXN_COMMIT; + } + if (fhp != NULL) { + (void)__os_closehandle(env, fhp); + fhp = NULL; + } + + if (DB_UNDO(op)) { + /* On the backward pass, we leave a note for the child txn. */ + if ((ret = __db_txnlist_update(env, + info, argp->child, cstat, NULL, &ret_stat, 1)) != 0) + goto out; + } else if (DB_REDO(op)) { + /* + * On the forward pass, check if someone recreated the + * file while we weren't looking. + */ + if (cstat == TXN_COMMIT) + (void)__memp_nameop(env, + is_real ? argp->real_fid.data : argp->tmp_fid.data, + NULL, real_name, NULL, 0); + } + +done: *lsnp = argp->prev_lsn; + ret = 0; + +out: if (real_name != NULL) + __os_free(env, real_name); + if (fhp != NULL) + (void)__os_closehandle(env, fhp); + REC_NOOP_CLOSE; +} + +/* + * __fop_file_remove_60_recover -- + * + * PUBLIC: int __fop_file_remove_60_recover + * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); + */ +int +__fop_file_remove_60_recover(env, dbtp, lsnp, op, info) + ENV *env; + DBT *dbtp; + DB_LSN *lsnp; + db_recops op; + void *info; +{ + __fop_file_remove_60_args *argp; + DBMETA *meta; + DB_FH *fhp; + size_t len; + u_int8_t mbuf[DBMETASIZE]; + u_int32_t cstat, ret_stat; + APPNAME appname; + int is_real, is_tmp, ret; + char *real_name; + + fhp = NULL; + meta = (DBMETA *)&mbuf[0]; + is_real = is_tmp = 0; + real_name = NULL; + REC_PRINT(__fop_file_remove_60_print); + REC_NOOP_INTRO(__fop_file_remove_60_read); + + /* + * This record is only interesting on the backward, forward, and + * apply phases. + */ + if (op != DB_TXN_BACKWARD_ROLL && + op != DB_TXN_FORWARD_ROLL && op != DB_TXN_APPLY) + goto done; + + appname = __fop_convert_appname(env, (APPNAME53)argp->appname); + if ((ret = __db_appname(env, appname, + argp->name.data, NULL, &real_name)) != 0) + goto out; + + /* Verify that we are manipulating the correct file. */ + len = 0; + if (__os_open(env, real_name, 0, 0, 0, &fhp) != 0 || + (ret = __fop_read_meta(env, real_name, + mbuf, DBMETASIZE, fhp, 1, &len)) != 0) { + /* + * If len is non-zero, then the file exists and has something + * in it, but that something isn't a full meta-data page, so + * this is very bad. Bail out! + */ + if (len != 0) + goto out; + + /* File does not exist. */ + cstat = TXN_EXPECTED; + } else { + /* + * We can ignore errors here since we'll simply fail the + * checks below and assume this is the wrong file. + */ + (void)__db_chk_meta(env, NULL, meta, DB_CHK_META); is_real = memcmp(argp->real_fid.data, meta->uid, DB_FILE_ID_LEN) == 0; is_tmp = @@ -695,3 +1369,4 @@ out: if (real_name != NULL) (void)__os_closehandle(env, fhp); REC_NOOP_CLOSE; } + diff --git a/src/fileops/fop_util.c b/src/fileops/fop_util.c index 1925ffd1..d51aba0f 100644 --- a/src/fileops/fop_util.c +++ b/src/fileops/fop_util.c @@ -1,7 +1,7 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 2001, 2012 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2015 Oracle and/or its affiliates. All rights reserved. * * $Id$ */ @@ -24,9 +24,10 @@ static int __fop_inmem_read_meta __P((DB *, DB_TXN *, const char *, u_int32_t, u_int32_t)); static int __fop_inmem_swap __P((DB *, DB *, DB_TXN *, const char *, const char *, const char *, DB_LOCKER *)); -static int __fop_ondisk_dummy __P((DB *, DB_TXN *, const char *, u_int8_t *)); +static int __fop_ondisk_dummy __P(( + DB *, DB_TXN *, const char *, u_int8_t *, APPNAME)); static int __fop_ondisk_swap __P((DB *, DB *, DB_TXN *, - const char *, const char *, const char *, DB_LOCKER *)); + const char *, const char *, const char *, DB_LOCKER *, APPNAME)); /* * Acquire the environment meta-data lock. The parameters are the @@ -115,7 +116,7 @@ __fop_lock_handle(env, dbp, locker, mode, elockp, flags) /* * If we are in recovery, the only locking we should be * doing is on the global environment. The one exception - * is if we are opening an exclusive database on a client + * is if we are opening an exclusive database on a client * syncing with the master. */ if (IS_RECOVERING(env) && !F2_ISSET(dbp, DB2_AM_INTEXCL)) @@ -234,8 +235,8 @@ __fop_file_setup(dbp, ip, txn, name, mode, flags, retidp) real_name = real_tmpname = tmpname = NULL; dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; aflags = LF_ISSET(DB_INTERNAL_PERSISTENT_DB) ? DB_APP_META : - (LF_ISSET(DB_INTERNAL_TEMPORARY_DB) ? DB_APP_NONE : DB_APP_DATA); - LF_CLR(DB_INTERNAL_PERSISTENT_DB | DB_INTERNAL_TEMPORARY_DB); + (LF_ISSET(DB_INTERNAL_BLOB_DB) ? DB_APP_BLOB : + (LF_ISSET(DB_INTERNAL_TEMPORARY_DB) ? DB_APP_NONE : DB_APP_DATA)); ret = 0; retries = 0; @@ -394,14 +395,14 @@ reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = goto done; } - /* + /* * Case 4: This is a valid file. Now check the - * checksum and decrypt the file so the file + * checksum and decrypt the file so the file * id can be obtained for the handle lock. Note that * the checksum can fail if the database is being * written (possible because the handle lock has * not been obtained yet). So on checksum fail retry - * until the checksum succeeds or the number of + * until the checksum succeeds or the number of * retries is exhausted, then throw an error. */ if (ret == 0 && (ret = __db_chk_meta(env, dbp, @@ -410,7 +411,7 @@ reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = ret = t_ret; goto err; } - /* + /* * Retry unless the number of retries is * exhausted. */ @@ -423,8 +424,7 @@ reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = ret = EINVAL; goto err; } - if ((ret = __os_closehandle(env, fhp)) != 0) - goto err; + CLOSE_HANDLE(dbp, fhp); goto retry; } /* Get the file id for the handle lock. */ @@ -464,11 +464,8 @@ reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = * any application level FCNTL semantics. */ DB_ASSERT(env, !LF_ISSET(DB_FCNTL_LOCKING)); - if (!F_ISSET(dbp, DB_AM_INMEM)) { - if ((ret = __os_closehandle(env, fhp)) != 0) - goto err; - fhp = NULL; - } + if (!F_ISSET(dbp, DB_AM_INMEM)) + CLOSE_HANDLE(dbp, fhp); if ((ret = __fop_lock_handle(env, dbp, locker, lockmode, &elock, 0)) != 0) { if (F_ISSET(dbp, DB_AM_INMEM)) @@ -495,7 +492,7 @@ reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = } - /* + /* * If we got here, then we have the handle lock, it is now * safe to check the rest of the meta data, since the file * will not be deleted out from under the handle. @@ -505,7 +502,7 @@ reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = dbp, txn, name, flags, DB_SKIP_CHK)) != 0) goto err; } else { - if ((ret = __db_meta_setup(env, dbp, real_name, + if ((ret = __db_meta_setup(env, dbp, real_name, (DBMETA *)mbuf, flags, DB_SKIP_CHK)) != 0) goto err; } @@ -524,9 +521,8 @@ reopen: if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = if (create_ok) { if (F_ISSET(dbp, DB_AM_INMEM)) { RESET_MPF(dbp, DB_MPOOL_DISCARD); - } else if ((ret = - __os_closehandle(env, fhp)) != 0) - goto err; + } else + CLOSE_HANDLE(dbp, fhp); LF_SET(DB_CREATE); goto create; } else { @@ -856,6 +852,7 @@ retry: if ((ret = __db_master_open(dbp, /* Copy the pagesize and set the sub-database flag. */ dbp->pgsize = mdbp->pgsize; F_SET(dbp, DB_AM_SUBDB); + dbp->blob_file_id = mdbp->blob_file_id; if (name != NULL && (ret = __db_master_update(mdbp, dbp, ip, txn, name, dbp->type, MU_OPEN, NULL, flags)) != 0) { @@ -881,6 +878,8 @@ retry: if ((ret = __db_master_open(dbp, DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, mname); + dbp->dirname = mdbp->dirname; + /* * We copy our fileid from our master so that we all open * the same file in mpool. We'll use the meta-pgno to lock @@ -1174,13 +1173,14 @@ err: * remove). * * PUBLIC: int __fop_dummy __P((DB *, - * PUBLIC: DB_TXN *, const char *, const char *)); + * PUBLIC: DB_TXN *, const char *, const char *, APPNAME)); */ int -__fop_dummy(dbp, txn, old, new) +__fop_dummy(dbp, txn, old, new, appname) DB *dbp; DB_TXN *txn; const char *old, *new; + APPNAME appname; { DB *tmpdbp; DB_TXN *stxn; @@ -1214,17 +1214,19 @@ __fop_dummy(dbp, txn, old, new) if (F_ISSET(dbp, DB_AM_NOT_DURABLE) && (ret = __db_set_flags(tmpdbp, DB_TXN_NOT_DURABLE)) != 0) goto err; + tmpdbp->dirname = dbp->dirname; memset(mbuf, 0, sizeof(mbuf)); ret = F_ISSET(dbp, DB_AM_INMEM) ? __fop_inmem_dummy(tmpdbp, stxn, back, mbuf) : - __fop_ondisk_dummy(tmpdbp, stxn, back, mbuf); + __fop_ondisk_dummy(tmpdbp, stxn, back, mbuf, appname); if (ret != 0) goto err; ret = F_ISSET(dbp, DB_AM_INMEM) ? __fop_inmem_swap(dbp, tmpdbp, stxn, old, new, back, txn->locker) : - __fop_ondisk_swap(dbp, tmpdbp, stxn, old, new, back, txn->locker); + __fop_ondisk_swap( + dbp, tmpdbp, stxn, old, new, back, txn->locker, appname); stxn = NULL; if (ret != 0) goto err; @@ -1246,12 +1248,13 @@ err: if (stxn != NULL) * and the subsequent calls in __db_rename do the work for the * transactional case). * - * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *)); + * PUBLIC: int __fop_dbrename __P((DB *, const char *, const char *, APPNAME)); */ int -__fop_dbrename(dbp, old, new) +__fop_dbrename(dbp, old, new, appname) DB *dbp; const char *old, *new; + APPNAME appname; { DB_LOCK elock; ENV *env; @@ -1269,11 +1272,11 @@ __fop_dbrename(dbp, old, new) } else { /* Get full names. */ if ((ret = __db_appname(env, - DB_APP_DATA, old, &dbp->dirname, &real_old)) != 0) + appname, old, &dbp->dirname, &real_old)) != 0) goto err; if ((ret = __db_appname(env, - DB_APP_DATA, new, &dbp->dirname, &real_new)) != 0) + appname, new, &dbp->dirname, &real_new)) != 0) goto err; } @@ -1414,9 +1417,11 @@ __fop_inmem_read_meta(dbp, txn, name, flags, chkflags) if ((ret = __db_chk_meta(dbp->env, dbp, metap, chkflags)) == 0) memcpy(dbp->fileid, ((DBMETA *)metap)->uid, DB_FILE_ID_LEN); - } else + } else ret = __db_meta_setup( dbp->env, dbp, name, metap, flags, chkflags); + if (ret == DB_CHKSUM_FAIL) + ret = DB_META_CHKSUM_FAIL; if ((t_ret = __memp_fput(dbp->mpf, ip, metap, dbp->priority)) && ret == 0) @@ -1426,11 +1431,12 @@ __fop_inmem_read_meta(dbp, txn, name, flags, chkflags) } static int -__fop_ondisk_dummy(dbp, txn, name, mbuf) +__fop_ondisk_dummy(dbp, txn, name, mbuf, appname) DB *dbp; DB_TXN *txn; const char *name; u_int8_t *mbuf; + APPNAME appname; { ENV *env; int ret; @@ -1442,11 +1448,11 @@ __fop_ondisk_dummy(dbp, txn, name, mbuf) dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; if ((ret = __db_appname(env, - DB_APP_DATA, name, &dbp->dirname, &realname)) != 0) + appname, name, &dbp->dirname, &realname)) != 0) goto err; if ((ret = __fop_create(env, - txn, NULL, name, &dbp->dirname, DB_APP_DATA, 0, dflags)) != 0) + txn, NULL, name, &dbp->dirname, appname, 0, dflags)) != 0) goto err; if ((ret = @@ -1455,7 +1461,7 @@ __fop_ondisk_dummy(dbp, txn, name, mbuf) ((DBMETA *)mbuf)->magic = DB_RENAMEMAGIC; if ((ret = __fop_write(env, txn, name, dbp->dirname, - DB_APP_DATA, NULL, 0, 0, 0, mbuf, DBMETASIZE, 1, dflags)) != 0) + appname, NULL, 0, 0, 0, mbuf, DBMETASIZE, 1, dflags)) != 0) goto err; memcpy(dbp->fileid, ((DBMETA *)mbuf)->uid, DB_FILE_ID_LEN); @@ -1511,11 +1517,12 @@ err: return (ret); } static int -__fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker) +__fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker, appname) DB *dbp, *tmpdbp; DB_TXN *txn; const char *old, *new, *back; DB_LOCKER *locker; + APPNAME appname; { DBT fiddbt, namedbt, tmpdbt; DB_FH *fhp; @@ -1538,7 +1545,7 @@ __fop_ondisk_swap(dbp, tmpdbp, txn, old, new, back, locker) dflags = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0; if ((ret = __db_appname(env, - DB_APP_DATA, new, &dbp->dirname, &realnew)) != 0) + appname, new, &dbp->dirname, &realnew)) != 0) goto err; /* Now, lock the name space while we initialize this file. */ @@ -1634,10 +1641,10 @@ retry: GET_ENVLOCK(env, locker, &elock); * swap for the handle lock. */ if ((ret = __fop_rename(env, txn, - old, new, &dbp->dirname, dbp->fileid, DB_APP_DATA, 1, dflags)) != 0) + old, new, &dbp->dirname, dbp->fileid, appname, 1, dflags)) != 0) goto err; if ((ret = __fop_rename(env, txn, back, old, - &dbp->dirname, tmpdbp->fileid, DB_APP_DATA, 0, dflags)) != 0) + &dbp->dirname, tmpdbp->fileid, appname, 0, dflags)) != 0) goto err; if ((ret = __fop_lock_handle(env, tmpdbp, locker, DB_LOCK_WRITE, &elock, NOWAIT_FLAG(txn))) != 0) @@ -1673,12 +1680,12 @@ retry: GET_ENVLOCK(env, locker, &elock); DB_INIT_DBT(namedbt, old, strlen(old) + 1); if ((t_ret = __fop_file_remove_log(env, parent, &lsn, dflags, &fiddbt, &tmpdbt, &namedbt, - (u_int32_t)DB_APP_DATA, child_txnid)) != 0 && ret == 0) + (u_int32_t)appname, child_txnid)) != 0 && ret == 0) ret = t_ret; /* This is a delayed delete of the dummy file. */ if ((ret = __db_appname(env, - DB_APP_DATA, old, &dbp->dirname, &realold)) != 0) + appname, old, &dbp->dirname, &realold)) != 0) goto err; if ((ret = __txn_remevent(env, parent, realold, NULL, 0)) != 0) |