Skip to content

Commit

Permalink
page mover
Browse files Browse the repository at this point in the history
  • Loading branch information
riverszhang89 committed Aug 27, 2024
1 parent 53e570a commit f174403
Show file tree
Hide file tree
Showing 28 changed files with 3,177 additions and 20 deletions.
3 changes: 2 additions & 1 deletion bbinc/thrman.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ enum thrtype {
THRTYPE_PGLOGS_ASOF = 23,
THRTYPE_WATCHDOG = 24,
THRTYPE_CLEANEXIT = 25,
THRTYPE_GENERIC = 26,
THRTYPE_PGMV = 26,
THRTYPE_GENERIC = 27,
THRTYPE_MAX
};

Expand Down
5 changes: 4 additions & 1 deletion bdb/bdb_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -2467,6 +2467,9 @@ int release_locks_int(const char *trace, const char *func, int line, struct sqlc
#define release_locks(trace) release_locks_int(trace, __func__, __LINE__, NULL)

int bdb_keylen(bdb_state_type *bdb_state, int ixnum);

void llmeta_collect_tablename_alias(void);
int bdb_rebuild_freelist(bdb_state_type *bdb_state);
int bdb_pgswap(bdb_state_type *bdb_state);
int bdb_pgswap_overflow(bdb_state_type *bdb_state);
int bdb_evict_from_cache(bdb_state_type *bdb_state);
#endif
103 changes: 103 additions & 0 deletions bdb/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -9027,3 +9027,106 @@ int bdb_debug_log(bdb_state_type *bdb_state, tran_type *trans, int inop)
op.data = &endianized;
return bdb_state->dbenv->debug_log(bdb_state->dbenv, tid, &op, NULL, NULL);
}

typedef int (*pgmv_rtn)(DB *, DB_TXN *);
static int call_berkdb_pgmv_rtn(bdb_state_type *bdb_state, pgmv_rtn rtn, const char *bdb_lock_str, int blobonly)
{
int rc = -1;

int dta, stripe;
int ix;
DB_ENV *dbenv;
DB_TXN *txn;
DB *dbp;

BDB_READLOCK(bdb_lock_str);

if (bdb_state->repinfo->master_host != bdb_state->repinfo->myhost)
goto out;

dbenv = bdb_state->dbenv;
rc = dbenv->txn_begin_low_priority(dbenv, NULL, &txn, 0);
if (rc != 0)
goto out;

/* Process data and blob */
for (dta = 0; rc == 0 && dta < MAXDTAFILES; ++dta) {
if (blobonly && dta == 0)
continue;
for (stripe = 0; rc == 0 && stripe < MAXDTASTRIPE; ++stripe) {
if ((dbp = bdb_state->dbp_data[dta][stripe]) != NULL) {
rc = rtn(dbp, txn);
if (rc != 0) {
logmsg(LOGMSG_ERROR, "pgmv failed rc %d\n", rc);
}
}
}
}

/* Process indexes */
for (ix = 0; !blobonly && rc == 0 && ix < MAXINDEX; ++ix) {
if ((dbp = bdb_state->dbp_ix[ix]) != NULL) {
rc = rtn(dbp, txn);
}
}

if (rc == 0)
rc = txn->commit(txn, 0);
else
txn->abort(txn);

out:
BDB_RELLOCK();
return rc;
}

/* check pages even if they are still referenced in the log */
extern int gbl_pgmv_unsafe_db_resize;
int bdb_rebuild_freelist(bdb_state_type *bdb_state)
{
int rc = 0, bdberr = BDBERR_NOERROR;
if (gbl_pgmv_unsafe_db_resize) {
logmsg(LOGMSG_WARN, "%s: unsafe_db_resize is enabled! full-recovery may not work!\n", __func__);
logmsg(LOGMSG_WARN, "%s: flushing bufferpool!\n", __func__);
rc = bdb_flush(bdb_state, &bdberr);
if (rc != 0 || bdberr != BDBERR_NOERROR) {
logmsg(LOGMSG_WARN, "%s: bdb_flush failed rc %d bdberr %d\n", __func__, rc, bdberr);
return rc;
}
}

pgmv_rtn rtn = bdb_state->dbp_data[0][0]->rebuild_freelist;
rc = call_berkdb_pgmv_rtn(bdb_state, rtn, __func__, 0);

if (rc == 0 && gbl_pgmv_unsafe_db_resize) {
logmsg(LOGMSG_WARN, "%s: unsafe_db_resize is enabled! flush again to push recovery point further\n", __func__);
rc = bdb_flush(bdb_state, &bdberr);
if (rc != 0 || bdberr != BDBERR_NOERROR) {
logmsg(LOGMSG_WARN, "%s: bdb_flush failed rc %d bdberr %d\n", __func__, rc, bdberr);
return rc;
}
}

return rc;
}

int bdb_pgswap(bdb_state_type *bdb_state)
{
pgmv_rtn rtn = bdb_state->dbp_data[0][0]->pgswap;
return call_berkdb_pgmv_rtn(bdb_state, rtn, __func__, 0);
}

extern int gbl_pgmv_handle_overflow;
int bdb_pgswap_overflow(bdb_state_type *bdb_state)
{
if (!gbl_pgmv_handle_overflow)
return 0;
pgmv_rtn rtn = bdb_state->dbp_data[0][0]->pgswap_overflow;
return call_berkdb_pgmv_rtn(bdb_state, rtn, __func__, 1);
}

int bdb_evict_from_cache(bdb_state_type *bdb_state)
{
pgmv_rtn rtn = bdb_state->dbp_data[0][0]->evict_from_cache;
return call_berkdb_pgmv_rtn(bdb_state, rtn, __func__, 0);
}
20 changes: 20 additions & 0 deletions berkdb/build/db.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,7 @@ struct __db_log_stat {
#define DB_MPOOL_DIRTY 0x002 /* Page is modified. */
#define DB_MPOOL_DISCARD 0x004 /* Don't cache the page. */
#define DB_MPOOL_PFPUT 0x008 /* page got by prefault */
#define DB_MPOOL_EVICT 0x010 /* Evict the page, now! */

/* Flag values for DB_MPOOLFILE->alloc. */
#define DB_MPOOL_LOWPRI 0x001 /* Evict low-priority pages. */
Expand Down Expand Up @@ -1803,6 +1804,24 @@ struct __db {
int (*cursor_nocount) __P((DB *, DB_TXN *, DBC **, u_int32_t));
int (*get_numpages) __P((DB *, db_pgno_t *));

/*
* Rebuilds the freelist in the page-order. Additionally ftruncates the file
* if there're continuous free pages at the end of the file.
*/
int (*rebuild_freelist) __P((DB *, DB_TXN *));
/*
* Scan the file backwards, and swap pages with lower-numbered free pages.
*/
int (*pgswap) __P((DB *, DB_TXN *));
/*
* swap overflow pages with lower-numbered free pages
*/
int (*pgswap_overflow) __P((DB *, DB_TXN *));
/*
* Evict all pages in this file from the bufferpool
*/
int (*evict_from_cache) __P((DB *, DB_TXN *));

/*
* Never called; these are a place to save function pointers
* so that we can undo an associate.
Expand Down Expand Up @@ -2692,6 +2711,7 @@ struct __db_env {

int (*txn_begin_with_prop)
__P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t, struct txn_properties*));
int (*txn_begin_low_priority) __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t));

int (*set_num_recovery_processor_threads)
__P((DB_ENV *env, int nthreads));
Expand Down
101 changes: 101 additions & 0 deletions berkdb/db/db.src
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,104 @@ ARG pgno db_pgno_t lu
ARG vec_len u_int32_t lu
ARG ptype u_int32_t lu
END

/*
* rebuild_freelist --
* Rebuilds freelist in the page order
* meta_lsn: the meta-data page's original lsn
* meta_pgno: the meta-data page number
* meta_pgno: the meta-data last page number
* end_pgno: freelist points to this page number if all pages in `fl' can be truncated
* fl: list of free pages to be rebuilt
* fllsn: lSN's of pages in `fl'
*/
BEGIN rebuild_freelist 70
DB fileid int32_t ld
POINTER meta_lsn DB_LSN * lu
ARG meta_pgno db_pgno_t lu
ARG last_pgno db_pgno_t lu
ARG end_pgno db_pgno_t lu
DBT fl DBT s
DBT fllsn DBT s
END

/*
* pg_swap --
* Swaps a page with a lower-numbered free page
* pgno: page to be replaced
* lsn: LSN of the page to be replaced
* hdr: header on the page
* data: data on the page
* next_pgno: pgno of next page
* next_pglsn: page LSN of next page
* prev_pgno: pgno of previous page
* prev_pglsn: page LSN of previous page
* parent_pgno: pgno of parent page
* parent_pglsn: page LSN of parent page
* pref_index: page reference in parent page
* new_pgno: pgno of new page that replaces the old page
* new_pglsn: page LSN of new page
*/
BEGIN pg_swap 71
DB fileid int32_t ld
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
DBT hdr DBT s
DBT data DBT s
ARG next_pgno db_pgno_t lu
POINTER next_pglsn DB_LSN * lu
ARG prev_pgno db_pgno_t lu
POINTER prev_pglsn DB_LSN * lu
ARG parent_pgno db_pgno_t lu
POINTER parent_pglsn DB_LSN * lu
ARG pref_indx db_indx_t lu
ARG new_pgno db_pgno_t lu
POINTER new_pglsn DB_LSN * lu
END

/*
* resize -- Resizes a file
* meta_lsn: meta page LSN
* meta_pgno: meta page number
* oldlast: original last page number
* newlast: new last page number
*/
BEGIN resize 72
DB fileid int32_t ld
POINTER meta_lsn DB_LSN * lu
ARG meta_pgno db_pgno_t lu
ARG oldlast db_pgno_t lu
ARG newlast db_pgno_t lu
END

/*
* pg_swap_overflow --
* Swaps an overflow page with a lower-numbered free page
* pgno: page to be replaced
* lsn: LSN of the page to be replaced
* data: page itself
* next_pgno: pgno of next page
* next_pglsn: page LSN of next page
* prev_pgno: pgno of previous page
* prev_pglsn: page LSN of previous page
* main_pgno: main page
* main_pglsn: LSN of main page
* main_indx: overflow record on main page
* new_pgno: pgno of new page that replaces the old page
* new_pglsn: page LSN of new page
*/
BEGIN pg_swap_overflow 73
DB fileid int32_t ld
ARG pgno db_pgno_t lu
POINTER lsn DB_LSN * lu
DBT data DBT s
ARG next_pgno db_pgno_t lu
POINTER next_pglsn DB_LSN * lu
ARG prev_pgno db_pgno_t lu
POINTER prev_pglsn DB_LSN * lu
ARG main_pgno db_pgno_t lu
POINTER main_pglsn DB_LSN * lu
ARG main_indx db_indx_t lu
ARG new_pgno db_pgno_t lu
POINTER new_pglsn DB_LSN * lu
END
Loading

0 comments on commit f174403

Please sign in to comment.