/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1997,2008 Oracle. All rights reserved. * * $Id: zerofill.c,v 12.29 2008/03/12 19:13:07 mbrey Exp $ */ #include "db_config.h" #include "db_int.h" /* * __db_zero_fill -- * Zero out bytes in the file. * * Pages allocated by writing pages past end-of-file are not zeroed, * on some systems. Recovery could theoretically be fooled by a page * showing up that contained garbage. In order to avoid this, we * have to write the pages out to disk, and flush them. The reason * for the flush is because if we don't sync, the allocation of another * page subsequent to this one might reach the disk first, and if we * crashed at the right moment, leave us with this page as the one * allocated by writing a page past it in the file. * * PUBLIC: int __db_zero_fill __P((ENV *, DB_FH *)); */ int __db_zero_fill(env, fhp) ENV *env; DB_FH *fhp; { #ifdef HAVE_FILESYSTEM_NOTZERO off_t stat_offset, write_offset; size_t blen, nw; u_int32_t bytes, mbytes; int group_sync, ret; u_int8_t *bp; /* Calculate the byte offset of the next write. */ write_offset = (off_t)fhp->pgno * fhp->pgsize + fhp->offset; /* Stat the file. */ if ((ret = __os_ioinfo(env, NULL, fhp, &mbytes, &bytes, NULL)) != 0) return (ret); stat_offset = (off_t)mbytes * MEGABYTE + bytes; /* Check if the file is large enough. */ if (stat_offset >= write_offset) return (0); /* Get a large buffer if we're writing lots of data. */ #undef ZF_LARGE_WRITE #define ZF_LARGE_WRITE (64 * 1024) if ((ret = __os_calloc(env, 1, ZF_LARGE_WRITE, &bp)) != 0) return (ret); blen = ZF_LARGE_WRITE; /* Seek to the current end of the file. */ if ((ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes)) != 0) goto err; /* * Hash is the only access method that allocates groups of pages. Hash * uses the existence of the last page in a group to signify the entire * group is OK; so, write all the pages but the last one in the group, * flush them to disk, then write the last one to disk and flush it. */ for (group_sync = 0; stat_offset < write_offset; group_sync = 1) { if (write_offset - stat_offset <= (off_t)blen) { blen = (size_t)(write_offset - stat_offset); if (group_sync && (ret = __os_fsync(env, fhp)) != 0) goto err; } if ((ret = __os_physwrite(env, fhp, bp, blen, &nw)) != 0) goto err; stat_offset += blen; } if ((ret = __os_fsync(env, fhp)) != 0) goto err; /* Seek back to where we started. */ mbytes = (u_int32_t)(write_offset / MEGABYTE); bytes = (u_int32_t)(write_offset % MEGABYTE); ret = __os_seek(env, fhp, mbytes, MEGABYTE, bytes); err: __os_free(env, bp); return (ret); #else COMPQUIET(env, NULL); COMPQUIET(fhp, NULL); return (0); #endif /* HAVE_FILESYSTEM_NOTZERO */ } /* * __db_zero -- * Zero to the end of the file. * * PUBLIC: int __db_zero_extend __P((ENV *, * PUBLIC: DB_FH *, db_pgno_t, db_pgno_t, u_int32_t)); */ int __db_zero_extend(env, fhp, pgno, last_pgno, pgsize) ENV *env; DB_FH *fhp; db_pgno_t pgno, last_pgno; u_int32_t pgsize; { int ret; size_t nwrote; u_int8_t *buf; if ((ret = __os_calloc(env, 1, pgsize, &buf)) != 0) return (ret); memset(buf, 0, pgsize); for (; pgno <= last_pgno; pgno++) if ((ret = __os_io(env, DB_IO_WRITE, fhp, pgno, pgsize, 0, pgsize, buf, &nwrote)) != 0) { if (ret == 0) { ret = EIO; goto err; } goto err; } err: __os_free(env, buf); return (ret); }