问题描述
Seata global_table 存在超时事务
使用:
select
*
from
global_table
where
status = 5
查询到超时全局事务:xid: 10.142.138.107:8091:8773548850834589350
查询对应库中的 undo_log 表内容:
select
*
from
undo_log
where
xid = '10.142.138.107:8091:8773548850834589350';
branch_id: 8773548850834589358
context: serializer=jackson&compressorType=NONE
rollback_info:
{
"@class": "io.seata.rm.datasource.undo.BranchUndoLog",
"xid": "10.142.138.107:8091:8773548850834589350",
"branchId": 8773548850834589358,
"sqlUndoLogs": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.undo.SQLUndoLog",
"sqlType": "UPDATE",
"tableName": "zf_payment_apply",
"beforeImage": {
"@class": "io.seata.rm.datasource.sql.struct.TableRecords",
"tableName": "zf_payment_apply",
"rows": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Row",
"fields": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "id",
"keyType": "PRIMARY_KEY",
"type": -5,
"value": [
"java.lang.Long",
1807580842336395264
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_time",
"keyType": "NULL",
"type": 93,
"value": [
"java.sql.Timestamp",
1719822160000
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user",
"keyType": "NULL",
"type": 12,
"value": "黄大连"
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user_id",
"keyType": "NULL",
"type": -5,
"value": [
"java.lang.Long",
1630477221617123328
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "impl_content_amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
8305039.12
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
8305039.12
]
}
]
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Row",
"fields": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "id",
"keyType": "PRIMARY_KEY",
"type": -5,
"value": [
"java.lang.Long",
1807614305345150976
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_time",
"keyType": "NULL",
"type": 93,
"value": [
"java.sql.Timestamp",
1719822160000
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user",
"keyType": "NULL",
"type": 12,
"value": "黄大连"
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user_id",
"keyType": "NULL",
"type": -5,
"value": [
"java.lang.Long",
1630477221617123328
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "impl_content_amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
8305039.12
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
8305039.12
]
}
]
]
}
]
]
},
"afterImage": {
"@class": "io.seata.rm.datasource.sql.struct.TableRecords",
"tableName": "zf_payment_apply",
"rows": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Row",
"fields": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "id",
"keyType": "PRIMARY_KEY",
"type": -5,
"value": [
"java.lang.Long",
1807580842336395264
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_time",
"keyType": "NULL",
"type": 93,
"value": [
"java.sql.Timestamp",
1719880683000
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user",
"keyType": "NULL",
"type": 12,
"value": "王炳超"
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user_id",
"keyType": "NULL",
"type": -5,
"value": [
"java.lang.Long",
1630477210032455680
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "impl_content_amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
7305039.12
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
7305039.12
]
}
]
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Row",
"fields": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "id",
"keyType": "PRIMARY_KEY",
"type": -5,
"value": [
"java.lang.Long",
1807614305345150976
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_time",
"keyType": "NULL",
"type": 93,
"value": [
"java.sql.Timestamp",
1719880683000
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user",
"keyType": "NULL",
"type": 12,
"value": "王炳超"
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user_id",
"keyType": "NULL",
"type": -5,
"value": [
"java.lang.Long",
1630477210032455680
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "impl_content_amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
7305039.12
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
7305039.12
]
}
]
]
}
]
]
}
}
]
]
}
branch_id: 8773548850834589360
context: serializer=jackson&compressorType=NONE
rollback_info:
{
"@class": "io.seata.rm.datasource.undo.BranchUndoLog",
"xid": "10.142.138.107:8091:8773548850834589350",
"branchId": 8773548850834589360,
"sqlUndoLogs": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.undo.SQLUndoLog",
"sqlType": "UPDATE",
"tableName": "zf_payment_apply",
"beforeImage": {
"@class": "io.seata.rm.datasource.sql.struct.TableRecords",
"tableName": "zf_payment_apply",
"rows": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Row",
"fields": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "id",
"keyType": "PRIMARY_KEY",
"type": -5,
"value": [
"java.lang.Long",
1807614305345150976
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_time",
"keyType": "NULL",
"type": 93,
"value": [
"java.sql.Timestamp",
1719880683000
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user",
"keyType": "NULL",
"type": 12,
"value": "王炳超"
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user_id",
"keyType": "NULL",
"type": -5,
"value": [
"java.lang.Long",
1630477210032455680
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "impl_content_amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
7305039.12
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
7305039.12
]
}
]
]
}
]
]
},
"afterImage": {
"@class": "io.seata.rm.datasource.sql.struct.TableRecords",
"tableName": "zf_payment_apply",
"rows": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Row",
"fields": [
"java.util.ArrayList",
[
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "id",
"keyType": "PRIMARY_KEY",
"type": -5,
"value": [
"java.lang.Long",
1807614305345150976
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_time",
"keyType": "NULL",
"type": 93,
"value": [
"java.sql.Timestamp",
1719880683000
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user",
"keyType": "NULL",
"type": 12,
"value": "王炳超"
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "update_user_id",
"keyType": "NULL",
"type": -5,
"value": [
"java.lang.Long",
1630477210032455680
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "impl_content_amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
8305039.12
]
},
{
"@class": "io.seata.rm.datasource.sql.struct.Field",
"name": "amount_available",
"keyType": "NULL",
"type": 3,
"value": [
"java.math.BigDecimal",
8305039.12
]
}
]
]
}
]
]
}
}
]
]
}
上述 undo_log 记录了全局事务中的数据变更,可以切分 beforeImage 和 afterImage,并使用 MobaDiff 进行对:
- undo_log branch_id: 8773548850834589358
- 条目1 - id: 1807580842336395264
- update_time: 1719880683000 -> 1719880683000
- update_user: 黄大连 -> 王炳超
- update_user_id: 1630477221617123328 -> 1630477210032455680
- impl_content_amount_available: 8305039.12 -> 7305039.12
- amount_available: 8305039.12 -> 7305039.12
- 条目2 - id: 1807614305345150976
- update_time: 1719822160000 -> 1719880683000
- update_user: 黄大连 -> 王炳超
- update_user_id: 1630477221617123328 -> 1630477210032455680
- impl_content_amount_available: 8305039.12 -> 7305039.12
- amount_available: 8305039.12 -> 7305039.12
- 条目1 - id: 1807580842336395264
- undo_log branch_id: 8773548850834589360
- 条目1 - id: 1807614305345150976
- impl_content_amount_available: 7305039.12 -> 8305039.12
- amount_available: 7305039.12 -> 8305039.12
- 条目1 - id: 1807614305345150976
此时查询数据库中数据状态:
- 条目1 - id: 1807580842336395264
- impl_content_amount_available: 7305039.12
- amount_available: 7305039.12
- 条目2 – id: 1807614305345150976
- impl_content_amount_available: 8305039.12
- amount_available: 8305039.12
对应服务中打印日志:
branchRollback failed. branchType:[AT], xid:[10.x.x.107:8091:8773548850834589350], branchId:[8773548850834589358], resourceId:[jdbc:mysql://10.x.x.87:3306/fc_paycenter], applicationData:[null]. reason:[Branch session rollback failed and try again later xid = 10.x.x.107:8091:8773548850834589350 branchId = 8773548850834589358 Has dirty records when undo.
问题分析
Seata 工作流程回顾
先来回顾一下 Seata 的工作流程。系统选用 Seata 的 AT 模式,采取两阶段提交(2PC)协议。
在第一阶段,即预提交阶段,将会对数据资源进行锁定,并进行数据库操作,后将操作记录至 undo_log 中。此时的数据变更不会真正提交到数据库中,而是依赖数据库多版本并发控制(MVCC)进行暂存。也就是即使预提交均完成,但在正式提交之前,使用 SQL 对数据库中的数据进行查询(不在同一事务内),查询到的依然是预提交之前的结果。
在第二阶段(提交/回滚阶段),如果一阶段所有分支事务都预提交成功,事务协调器(TC)会向所有分支发送提交请求。每个分支接收到提交请求后,会正式提交之前预提交阶段在数据库中的修改操作,此时数据变更才真正对其他事务可见。同时,分支会清除之前生成的 undo_log
记录,因为数据已提交,无需回滚。如果在任意阶段(包括一阶段结束到二阶段开始之间的时间窗口)发现问题,比如网络故障、某个分支提交失败等,事务协调器会向所有分支发送回滚请求。每个分支使用之前记录的 undo_log
来撤销之前的操作,恢复数据到预提交前的状态,确保数据一致性。
Seata 隔离级别
我们知道 MySQL 的默认隔离级别是 可重复读。 Seata 的默认隔离级别是 读未提交,但是由于 Seata 会对全局事务的数据资源进行管理控制(AT 模式下不是单纯对数据进行锁定),通过全局锁和乐观锁机制确保数据操作的串行化(在准备提交阶段检查数据是否被其他事务修改,发现冲突则进行回滚和重试)。两个全局事务并发对同一数据进行更新时,在任一事务完全提交之前,另一事务是不会存在基于未提交数据进行数据更新的情况。
Seata 全局事务状态表
状态 | 代码 | 备注 |
全局事务开始(Begin) | 1 | 此状态可以接受新的分支事务注册 |
全局事务提交中(Committing) | 2 | 这个状态会随时改变 |
全局事务提交重试(CommitRetry) | 3 | 在提交异常被解决后尝试重试提交 |
全局事务回滚中(Rollbacking) | 4 | 正在重新回滚全局事务 |
全局事务回滚重试(RollbackRetrying) | 5 | 在全局回滚异常被解决后尝试事务重试回滚中 |
全局事务超时回滚中(TimeoutRollbacking) | 6 | 全局事务超时回滚中 |
全局事务超时回滚重试中(TimeoutRollbackRetrying) | 7 | 全局事务超时回滚重试中 |
异步提交中(AsyncCommitting) | 8 | 异步提交中 |
二阶段已提交(Committed) | 9 | 二阶段已提交,此状态后全局事务状态不会再改变 |
二阶段提交失败(CommitFailed) | 10 | 二阶段提交失败 |
二阶段决议全局回滚(Rollbacked) | 11 | 二阶段决议全局回滚 |
二阶段全局回滚失败(RollbackFailed) | 12 | 二阶段全局回滚失败 |
二阶段超时回滚(TimeoutRollbacked) | 13 | 二阶段超时回滚 |
二阶段超时回滚失败(TimeoutRollbackFailed) | 14 | 二阶段超时回滚失败 |
全局事务结束(Finished) | 15 | 全局事务结束 |
二阶段提交超时(CommitRetryTimeout) | 16 | 二阶段提交因超过重试时间限制导致失败 |
二阶段回滚超时(RollbackRetryTimeout) | 17 | 二阶段回滚因超过重试时间限制导致失败 |
位置状态(UnKnown) | 0 | 位置状态 |
Seata 分支事务状态表
状态 | 代码 | 备注 |
分支事务注册(Registered) | 1 | 向TC注册分支事务 |
分支事务一阶段完成(PhaseOne_Done) | 2 | 分支事务一阶段业务逻辑完成 |
分支事务一阶段失败(PhaseOne_Failed) | 3 | 分支事务一阶段业务逻辑失败 |
分支事务一阶段超时(PhaseOne_Timeout) | 4 | 分支事务一阶段处理超时 |
分支事务二阶段已提交(PhaseTwo_Committed) | 5 | 分支事务二阶段提交 |
分支事务二阶段提交失败重试(PhaseTwo_CommitFailed_Retryable) | 6 | 分支事务二阶段提交失败重试 |
分支事务二阶段提交失败不重试(PhaseTwo_CommitFailed_Unretryable) | 7 | 分支事务二阶段提交失败不重试 |
分支事务二阶段已回滚(PhaseTwo_Rollbacked) | 8 | 分支事务二阶段已回滚 |
分支事务二阶段回滚失败重试(PhaseTwo_RollbackFailed_Retryable) | 9 | 分支事务二阶段回滚失败重试 |
分支事务二阶段回滚失败不重试(PhaseTwo_RollbackFailed_Unretryable) | 10 | 二阶段提交失败 |
未知状态(UnKnown) | 0 | 未知状态 |
原因猜测
undo_log 中的变更数据与数据库中最终数据一致,而服务打印 xid = 10.x.x.107:8091:8773548850834589350 branchId = 8773548850834589358 Has dirty records when undo.意味着该事务第二阶段仍未完成,处于回滚重试的状态,且 lock_table 中的锁信息也并未完成释放。
猜测可能:
一、数据出现脏写情况。
二、资源管理器(RM)已经完成了对事务的二阶段提交操作(无论是提交还是回滚),但在向事务协调者(TC)报告这一状态的过程中出现了异常,导致 TC 没有接收到确认信息。在这种情况下,TC 可能因为超时或未收到所有参与者的响应,认为事务执行出现了问题,进而尝试回滚事务,因此 global_table 中的 status 被置为 5。由于 RM 已经提交事务,数据库中的数据已经变更,此时基于之前生成的 undo_log 进行回滚可能无法正确还原数据到事务开始前的状态。由于 undo_log 是在 RM 明确接收到响应后才会进行清理,也就是说如果 RM 完成了二阶段提交,但与 TC 的通信过程中出现异常,undo_log 可能会暂时保留。
于是我去查询了 RM 在该全局事务中打印的日志,以下日志顺序执行:
Begin new global transaction [10.x.x.107:8091:8773548850834589350
== > 此处远程调用抛出异常 < ==
handle branch rollback process:xid=10.x.x.107:8091:8773548850834589350,branchId=8773548850834589358,branchType=AT,resourceId=jdbc:mysql://10.x.x.87:3306/fc_paycenter,applicationData=null
Branch Rollbacking: 10.x.x.107:8091:8773548850834589350 8773548850834589358 jdbc:mysql://10.x.x.87:3306/fc_paycenter
Field not equals, name amount_available, old value 7305039.12, new value 8305039.12
branchRollback failed. branchType:[AT], xid:[10.x.x.107:8091:8773548850834589350], branchId:[8773548850834589358], resourceId:[jdbc:mysql://10.x.x.87:3306/fc_paycenter], applicationData:[null]. reason:[Branch session rollback failed and try again later xid = 10.x.x.107:8091:8773548850834589350 branchId = 8773548850834589358 Has dirty records when undo.
Suspending current transaction, xid = 10.x.x.107:8091:8773548850834589350
[10.x.x.107:8091:8773548850834589350] rollback status: RollbackRetrying
Retrying to rollback transaction[10.x.x.107:8091:8773548850834589350]
handle branch rollback process:xid=10.x.x.107:8091:8773548850834589350,branchId=8773548850834589352,branchType=AT,resourceId=jdbc:mysql://10.x.x.86:3306/fc_project,applicationData=null
Branch Rollbacking: 10.x.x.107:8091:8773548850834589350 8773548850834589352 jdbc:mysql://10.x.x.86:3306/fc_project
xid 10.x.x.107:8091:8773548850834589350 branch 8773548850834589352, undo_log deleted with GlobalFinished
... 后面进入 fc_paycenter 分支下的 rollback 循环
根据上述日志,可以排除情况二的假设,可能是出现了脏写情况,导致分支无限尝试回滚,在后续版本的 Seata 中(1.5.x 之后),改变了该处理机制,抛出一个 SQLUndoDirtyException 而不再进行重试,并打印如下日志:
Branch session rollback failed because of dirty undo log, please delete the relevant undolog after manually calibrating the data.
原因确定
最后原因确定是 Seata Server 版本启动错误,与应用依赖版本不匹配。