InnoDB 作为目前 MySQL 的主要存储引擎,其中 record 细节繁琐,这里仅做整理以便查阅. 版本基于 MySQL-8.0.25.
数据结构
InnoDB record 的逻辑格式: dtuple_t
/** Structure for an SQL data tuple of fields (logical record) */structdtuple_t{/* ... *//** Number of fields in dtuple */ulintn_fields;/* 当前 dtuple 记录的字段数量. *//** number of fields which should be used in comparison services of rem0cmp.*;
the index search is performed by comparing only these fields, others are
ignored; the default value in dtuple creation is the same value as n_fields */ulintn_fields_cmp;/* 当前 dtuple 中可以用来比较的字段数量, 可以通过
* dtuple_set_n_fields_cmp() 设置. *//** Fields. */dfield_t*fields;/* 当前 dtuple 的字段内容. *//** Structure for an SQL data field */structdfield_t{void*data;/*!< pointer to data */unsignedext:1;/*!< TRUE=externally stored, FALSE=local */unsignedspatial_status:2;/*!< spatial status of externally stored field
in undo log for purge */unsignedlen;/*!< data length; UNIV_SQL_NULL if SQL null 数据长度 */dtype_ttype;/*!< type of data 数据类型*//* ... */}*//** ... *//** Compare a data tuple to a physical record.
* dtuple_t 与 rec_t 的比较函数. */intcompare(constrec_t*rec,constdict_index_t*index,constulint*offsets,ulint*matched_fields)const;/** ... */};
MySQL SQL 层的 record 可以通过row_sel_convert_mysql_key_to_innobase()转换为 InnoDB 可识别的dtuple_t结构.
如果 pcur 指向一个 user record, 保存 user record, m_rel_pos 为 BTR_PCUR_ON.
restore_position()先尝试乐观加锁,即直接判断m_modify_clock是否变化,假如 b+ tree 发生了 SMO, 需要进行悲观加锁的方式,即通过btr_cur_search_to_nth_level()重新 search 加锁:
boolbtr_pcur_t::restore_position(ulintlatch_mode,mtr_t*mtr,constchar*file,ulintline){dtuple_t*tuple;page_cur_mode_tmode;ut_ad(mtr->is_active());ut_ad(m_old_stored);ut_ad(is_positioned());autoindex=btr_cur_get_index(get_btr_cur());/* ... */ut_a(m_old_rec!=nullptr);ut_a(m_old_n_fields>0);/* Optimistic latching involves S/X latch not required for
intrinsic table instead we would prefer to search fresh. */if((latch_mode==BTR_SEARCH_LEAF||latch_mode==BTR_MODIFY_LEAF||latch_mode==BTR_SEARCH_PREV||latch_mode==BTR_MODIFY_PREV)&&!m_btr_cur.index->table->is_intrinsic()){/* Try optimistic restoration. *//* 乐观恢复. */if(m_block_when_stored.run_with_hint([&](buf_block_t*hint){returnhint!=nullptr&&btr_cur_optimistic_latch_leaves(hint,m_modify_clock,&latch_mode,&m_btr_cur,file,line,mtr);})){m_pos_state=BTR_PCUR_IS_POSITIONED;m_latch_mode=latch_mode;buf_block_dbg_add_level(get_block(),dict_index_is_ibuf(index)?SYNC_IBUF_TREE_NODE:SYNC_TREE_NODE);if(m_rel_pos==BTR_PCUR_ON){#ifdef UNIV_DEBUG
/* ... */#endif /* UNIV_DEBUG */return(true);}/* This is the same record as stored,
may need to be adjusted for BTR_PCUR_BEFORE/AFTER,
depending on search mode and direction. */if(is_on_user_rec()){m_pos_state=BTR_PCUR_IS_POSITIONED_OPTIMISTIC;}return(false);}}/* If optimistic restoration did not succeed, open the cursor anew */autoheap=mem_heap_create(256);tuple=dict_index_build_data_tuple(index,m_old_rec,m_old_n_fields,heap);/* Save the old search mode of the cursor */autoold_mode=m_search_mode;/* 根据 store_position() 时记录的 m_rel_pos 采用不同的 search mode. */switch(m_rel_pos){caseBTR_PCUR_ON:mode=PAGE_CUR_LE;break;caseBTR_PCUR_AFTER:mode=PAGE_CUR_G;break;caseBTR_PCUR_BEFORE:mode=PAGE_CUR_L;break;default:ut_error;}/* 乐观恢复 pcur 失败,就要通过 btr_cur_search_to_nth_level 来重新定位 pcur. */open_no_init(index,tuple,mode,latch_mode,0,mtr,file,line);/* Restore the old search mode */m_search_mode=old_mode;ut_ad(m_rel_pos==BTR_PCUR_ON||m_rel_pos==BTR_PCUR_BEFORE||m_rel_pos==BTR_PCUR_AFTER);if(m_rel_pos==BTR_PCUR_ON&&is_on_user_rec()&&!cmp_dtuple_rec(tuple,get_rec(),index,rec_get_offsets(get_rec(),index,nullptr,ULINT_UNDEFINED,&heap))){/* We have to store the NEW value for the modify clock,
since the cursor can now be on a different page!
But we can retain the value of old_rec */autoblock=get_block();m_block_when_stored.store(block);m_modify_clock=buf_block_get_modify_clock(block);m_old_stored=true;mem_heap_free(heap);return(true);}mem_heap_free(heap);/* We have to store new position information, modify_clock etc.,
to the cursor because it can now be on a different page, the record
under it may have been removed, etc. */store_position(mtr);return(false);}
store_position()会记录buf_block_t, 在乐观恢复中直接通过尝试对buf_block_t加锁,当前的 Buffer Pool 支持动态 resize, 这部分的内存可能会被释放, 所以 InnoDB 会首先判断这个buf_block_t指针是否存在于 Buffer Pool 的 chunk 中:
voidBlock_hint::buffer_fix_block_if_still_valid(){if(m_block!=nullptr){constbuf_pool_t*constpool=buf_pool_get(m_page_id);rw_lock_t*latch=buf_page_hash_lock_get(pool,m_page_id);rw_lock_s_lock(latch);/* If not own buf_pool_mutex, page_hash can be changed. */latch=buf_page_hash_lock_s_confirm(latch,pool,m_page_id);if(buf_is_block_in_instance(pool,m_block)&&m_page_id==m_block->page.id&&buf_block_get_state(m_block)==BUF_BLOCK_FILE_PAGE){buf_block_buf_fix_inc(m_block,__FILE__,__LINE__);}else{clear();}rw_lock_s_unlock(latch);}}