天天看點

greenplum 正常鎖管理器-1

1正常鎖/事務鎖管理概述

  在Greenplum中,根據不同的場景和目的定義了三類鎖,分别是自旋鎖(Spinlocks)、輕量級鎖(LWLocks)和正常鎖(Regular locks,也叫重量級鎖)。

  自旋鎖是一種短期持有的鎖。如果加鎖之後程式指令很多,或者涉及到系統調用,則不适合使用自旋鎖。自旋鎖通常由硬體指令TAS來實作,等待鎖的程序會忙等直到可以拿到鎖,或是如果等待時間過長,會有逾時機制。自旋鎖沒有死鎖檢測和出錯時自動釋放。

  輕量級鎖為共享記憶體中需要并發通路的結構體提供鎖保護,支援兩種模式:互斥模式和共享模式。輕量級鎖并沒有死鎖檢測機制,但是在elog出錯恢複時,輕量級鎖管理器會自動釋放持有的輕量級鎖,是以一個程序在持有輕量級鎖時是可以安全地報錯的。通常來說,在沒有鎖競争的情況下,擷取和釋放一個輕量級鎖都是很快的。當一個程序必須等待一個輕量級鎖時,會阻塞在一個SysV信号量上,是以等待過程并不消耗CPU時間。等待程序按照申請鎖的先後順序獲得授權,沒有逾時機制。

  正常鎖也叫做重量級鎖,用于對資料庫對象,比如表、資料記錄等加鎖。普通鎖支援多種不同的加鎖模式,同時也支援死鎖檢測以及在事務結束時自動釋放。

接下來,我們将重點講述普通鎖的實作細節。

2 關鍵資料結構

鎖方法是對鎖行為的整體描述。在目前的Greenplum資料庫中,有三種鎖方法:DEFAULT、USER和RESOURCE。

#define DEFAULT_LOCKMETHOD  1
#define USER_LOCKMETHOD     2
#define RESOURCE\_LOCKMETHOD 3
           

其中,DEFAULT鎖方法是系統預設的加鎖方法,用于對常見資料對象加鎖。USER鎖方法主要用于意向鎖(Advisory Locks)。RESOURCE鎖方法用于對資源隊列的通路加鎖。其結構體如下:

typedef struct LockMethodData
{
	int			numLockModes;
	const LOCKMASK *conflictTab;
	const char *const *lockModeNames;
	const bool *trace_flag;
} LockMethodData;

typedef const LockMethodData *LockMethod;
           

LOCKMODE 鎖模式

/*
 * These are the valid values of type LOCKMODE for all the standard lock
 * methods (both DEFAULT and USER).
 */

/* NoLock is not a lock mode, but a flag value meaning "don't get a lock" */
#define NoLock					0

#define AccessShareLock			1	/* SELECT */
#define RowShareLock			2	/* SELECT FOR UPDATE/FOR SHARE */
#define RowExclusiveLock		3	/* INSERT, UPDATE, DELETE */
#define ShareUpdateExclusiveLock 4	/* VACUUM (non-FULL),ANALYZE, CREATE INDEX
									 * CONCURRENTLY */
#define ShareLock				5	/* CREATE INDEX (WITHOUT CONCURRENTLY) */
#define ShareRowExclusiveLock	6	/* like EXCLUSIVE MODE, but allows ROW
									 * SHARE */
#define ExclusiveLock			7	/* blocks ROW SHARE/SELECT...FOR UPDATE */
#define AccessExclusiveLock		8	/* ALTER TABLE, DROP TABLE, VACUUM FULL,
									 * and unqualified LOCK TABLE */

#define MaxLockMode				8
           

在記憶體中正常鎖以LOCK結構體表示:

/*
 * Per-locked-object lock information:          		  // 鎖對象資訊
 *
 * tag -- uniquely identifies the object being locked
 * grantMask -- bitmask for all lock types currently granted on this object.
 * waitMask -- bitmask for all lock types currently awaited on this object.
 * procLocks -- list of PROCLOCK objects for this lock.
 * waitProcs -- queue of processes waiting for this lock.
 * requested -- count of each lock type currently requested on the lock
 *		(includes requests already granted!!).
 * nRequested -- total requested locks of all types.
 * granted -- count of each lock type currently granted on the lock.
 * nGranted -- total granted locks of all types.
 * holdTillEndXact -- if the lock is releasable before the end of the transaction.
 *
 * Note: these counts count 1 for each backend.  Internally to a backend,
 * there may be multiple grabs on a particular lock, but this is not reflected
 * into shared memory.
 */
typedef struct LOCK 
{
	/* hash key */
	LOCKTAG		tag;			/* unique identifier of lockable object */       // 鎖辨別,hash ksy

	/* data */
	LOCKMASK	grantMask;		/* bitmask for lock types already granted */	 // 已授予的鎖類型掩碼
	LOCKMASK	waitMask;		/* bitmask for lock types awaited */			 // 等待鎖類型掩碼
	SHM_QUEUE	procLocks;		/* list of PROCLOCK objects assoc. with lock */  // PROCLOCK對象連結清單
	PROC_QUEUE	waitProcs;		/* list of PGPROC objects waiting on lock */	 // 等待該鎖的PGPROC連結清單
	int			requested[MAX_LOCKMODES];	/* counts of requested locks */		 // 每種鎖模式請求鎖次數
	int			nRequested;		/* total of requested[] array */				 // 所有模式下鎖請求總數
	int			granted[MAX_LOCKMODES]; /* counts of granted locks */			 // 已授權鎖模式
	int			nGranted;		/* total of granted[] array */					 // 授權總數
	bool		holdTillEndXact;     /* flag for global deadlock detector */          // 全局死鎖檢測辨別
} LOCK;
           

為加快鎖查找,采用哈希表的方式的進行存放鎖對象,其中哈希鍵為LOCKTAG,結構體資訊如下:

/*
 * The LOCKTAG struct is defined with malice aforethought to fit into 16
 * bytes with no padding.  Note that this would need adjustment if we were
 * to widen Oid, BlockNumber, or TransactionId to more than 32 bits.
 *
 * We include lockmethodid in the locktag so that a single hash table in
 * shared memory can store locks of different lockmethods.
 */
typedef struct LOCKTAG
{
	uint32		locktag_field1; /* a 32-bit ID field */
	uint32		locktag_field2; /* a 32-bit ID field */
	uint32		locktag_field3; /* a 32-bit ID field */
	uint16		locktag_field4; /* a 16-bit ID field */
	uint8		locktag_type;	/* see enum LockTagType */
	uint8		locktag_lockmethodid;	/* lockmethod indicator */
} LOCKTAG;
           

每個鎖都有相應的持有者,其以PROCLOCK結構體表示:

/*
 * We may have several different backends holding or awaiting locks
 * on the same lockable object.  We need to store some per-holder/waiter
 * information for each such holder (or would-be holder).  This is kept in
 * a PROCLOCK struct.
 *
 * PROCLOCKTAG is the key information needed to look up a PROCLOCK item in the
 * proclock hashtable.  A PROCLOCKTAG value uniquely identifies the combination
 * of a lockable object and a holder/waiter for that object.  (We can use
 * pointers here because the PROCLOCKTAG need only be unique for the lifespan
 * of the PROCLOCK, and it will never outlive the lock or the proc.)
 *
 * Internally to a backend, it is possible for the same lock to be held
 * for different purposes: the backend tracks transaction locks separately
 * from session locks.  However, this is not reflected in the shared-memory
 * state: we only track which backend(s) hold the lock.  This is OK since a
 * backend can never block itself.
 *
 * The holdMask field shows the already-granted locks represented by this
 * proclock.  Note that there will be a proclock object, possibly with
 * zero holdMask, for any lock that the process is currently waiting on.
 * Otherwise, proclock objects whose holdMasks are zero are recycled
 * as soon as convenient.
 *
 * releaseMask is workspace for LockReleaseAll(): it shows the locks due
 * to be released during the current call.  This must only be examined or
 * set by the backend owning the PROCLOCK.
 *
 * Each PROCLOCK object is linked into lists for both the associated LOCK
 * object and the owning PGPROC object.  Note that the PROCLOCK is entered
 * into these lists as soon as it is created, even if no lock has yet been
 * granted.  A PGPROC that is waiting for a lock to be granted will also be
 * linked into the lock's waitProcs queue.
 */
typedef struct PROCLOCKTAG
{
	/* NB: we assume this struct contains no padding! */
	LOCK	   *myLock;			/* link to per-lockable-object information */     // 鎖對象資訊
	PGPROC	   *myProc;			/* link to PGPROC of owning backend */			  // 程序的PGPROC
} PROCLOCKTAG;

typedef struct PROCLOCK
{
	/* tag */
	PROCLOCKTAG tag;			/* unique identifier of proclock object */

	/* data */
	PGPROC	   *groupLeader;	/* proc's lock group leader, or proc itself */       
	LOCKMASK	holdMask;		/* bitmask for lock types currently held */        //目前持有鎖類型掩碼
	LOCKMASK	releaseMask;	/* bitmask for lock types to be released */		   //已釋放鎖類型
	SHM_QUEUE	lockLink;		/* list link in LOCK's list of proclocks */		   // 用于連結至LOCK結構體中prolocks連結清單
	SHM_QUEUE	procLink;		/* list link in PGPROC's list of proclocks */      // 用于連結至PGPRCO結構體中的proclocks連結清單
	int			nLocks;			/* total number of times lock is held by           // 資源排程相關
								   this process, used by resource scheduler */
	SHM_QUEUE	portalLinks;	/* list of ResPortalIncrements for this         
								   proclock, used by resource scheduler */
} PROCLOCK;
           

代表backend程序的結構體PGPROC、代表鎖持有者或者等待者的結構體PROCLOCK,以及代表鎖對象的結構體LOCK之間的關系如下圖所示:

greenplum 正常鎖管理器-1

針對每個backend,greenplum在其本地建立鎖哈希表用于緩存已申請的鎖對象,加速後續申請/釋放鎖,其結構體為LOCALLOCK:

/*
 * Each backend also maintains a local hash table with information about each
 * lock it is currently interested in.  In particular the local table counts
 * the number of times that lock has been acquired.  This allows multiple
 * requests for the same lock to be executed without additional accesses to
 * shared memory.  We also track the number of lock acquisitions per
 * ResourceOwner, so that we can release just those locks belonging to a
 * particular ResourceOwner.
 *
 * When holding a lock taken "normally", the lock and proclock fields always
 * point to the associated objects in shared memory.  However, if we acquired
 * the lock via the fast-path mechanism, the lock and proclock fields are set
 * to NULL, since there probably aren't any such objects in shared memory.
 * (If the lock later gets promoted to normal representation, we may eventually
 * update our locallock's lock/proclock fields after finding the shared
 * objects.)
 *
 * Caution: a locallock object can be left over from a failed lock acquisition
 * attempt.  In this case its lock/proclock fields are untrustworthy, since
 * the shared lock object is neither held nor awaited, and hence is available
 * to be reclaimed.  If nLocks > 0 then these pointers must either be valid or
 * NULL, but when nLocks == 0 they should be considered garbage.
 */
typedef struct LOCALLOCKTAG
{
	LOCKTAG		lock;			/* identifies the lockable object */		// locallock 辨別
	LOCKMODE	mode;			/* lock mode for this table entry */        // 鎖模式
} LOCALLOCKTAG;

typedef struct LOCALLOCKOWNER
{
	/*
	 * Note: if owner is NULL then the lock is held on behalf of the session;
	 * otherwise it is held on behalf of my current transaction.
	 *
	 * Must use a forward struct reference to avoid circularity.
	 */
	struct ResourceOwnerData *owner;									    // 鎖資源跟蹤器
	int64		nLocks;			/* # of times held by this owner */         // 該資源跟蹤器持有鎖次數
} LOCALLOCKOWNER;

typedef struct LOCALLOCK
{
	/* tag */
	LOCALLOCKTAG tag;			/* unique identifier of locallock entry */

	/* data */
	uint32		hashcode;		/* copy of LOCKTAG's hash value */          // 哈希值
	LOCK	   *lock;			/* associated LOCK object, if any */        // 涉及的lock對象
	PROCLOCK   *proclock;		/* associated PROCLOCK object, if any */    // 涉及的prolock對象
	int64		nLocks;			/* total number of times lock is held */    // 該lock持有次數
	int			numLockOwners;	/* # of relevant ResourceOwners */			// 該lock相關的資源跟蹤器數目
	int			maxLockOwners;	/* allocated size of array */               // 配置設定的資源跟蹤器數目
	LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */            // lockOwner資訊 
	bool		holdsStrongLockCount;	/* bumped FastPathStrongRelationLocks */     // 是否持有fp強鎖
	bool		lockCleared;	/* we read all sinval msgs for lock */				// 是否處理 sinval資訊
	bool		istemptable;	/* MPP: During prepare we set this if the lock is on a temp table, to avoid MPP-1094 */
} LOCALLOCK;
           

鎖模式沖突

鎖沖突矩陣

/*
 * Data structures defining the semantics of the standard lock methods.
 *
 * The conflict table defines the semantics of the various lock modes.
 */
static const LOCKMASK LockConflicts[] = {
	0,

	/* AccessShareLock */
	LOCKBIT_ON(AccessExclusiveLock),

	/* RowShareLock */
	LOCKBIT_ON(ExclusiveLock) | LOCKBIT_ON(AccessExclusiveLock),

	/* RowExclusiveLock */
	LOCKBIT_ON(ShareLock) | LOCKBIT_ON(ShareRowExclusiveLock) |
	LOCKBIT_ON(ExclusiveLock) | LOCKBIT_ON(AccessExclusiveLock),

	/* ShareUpdateExclusiveLock */
	LOCKBIT_ON(ShareUpdateExclusiveLock) |
	LOCKBIT_ON(ShareLock) | LOCKBIT_ON(ShareRowExclusiveLock) |
	LOCKBIT_ON(ExclusiveLock) | LOCKBIT_ON(AccessExclusiveLock),

	/* ShareLock */
	LOCKBIT_ON(RowExclusiveLock) | LOCKBIT_ON(ShareUpdateExclusiveLock) |
	LOCKBIT_ON(ShareRowExclusiveLock) |
	LOCKBIT_ON(ExclusiveLock) | LOCKBIT_ON(AccessExclusiveLock),

	/* ShareRowExclusiveLock */
	LOCKBIT_ON(RowExclusiveLock) | LOCKBIT_ON(ShareUpdateExclusiveLock) |
	LOCKBIT_ON(ShareLock) | LOCKBIT_ON(ShareRowExclusiveLock) |
	LOCKBIT_ON(ExclusiveLock) | LOCKBIT_ON(AccessExclusiveLock),

	/* ExclusiveLock */
	LOCKBIT_ON(RowShareLock) |
	LOCKBIT_ON(RowExclusiveLock) | LOCKBIT_ON(ShareUpdateExclusiveLock) |
	LOCKBIT_ON(ShareLock) | LOCKBIT_ON(ShareRowExclusiveLock) |
	LOCKBIT_ON(ExclusiveLock) | LOCKBIT_ON(AccessExclusiveLock),

	/* AccessExclusiveLock */
	LOCKBIT_ON(AccessShareLock) | LOCKBIT_ON(RowShareLock) |
	LOCKBIT_ON(RowExclusiveLock) | LOCKBIT_ON(ShareUpdateExclusiveLock) |
	LOCKBIT_ON(ShareLock) | LOCKBIT_ON(ShareRowExclusiveLock) |
	LOCKBIT_ON(ExclusiveLock) | LOCKBIT_ON(AccessExclusiveLock)

};
           

對應的沖突視圖:

greenplum 正常鎖管理器-1

每種鎖模式與特定的SQL語句關系圖:

greenplum 正常鎖管理器-1