Oracle 19C 依旧支持bbed

Posted on 2019 年 02 月 28 日 by 惜分飞

Oracle 19c

SQL> select BANNER from v$version;

BANNER
--------------------------------------------------------------------------------
Oracle Database 19c Enterprise Edition Release 19.0.0.0.0 - Production

SQL> select name from v$datafile;

NAME
--------------------------------------------------------------------------------
/u01/app/oracle/oradata/ORA19C/system01.dbf
/u01/app/oracle/oradata/ORA19C/sysaux01.dbf
/u01/app/oracle/oradata/ORA19C/undotbs01.dbf
/u01/app/oracle/oradata/ORA19C/pdbseed/system01.dbf
/u01/app/oracle/oradata/ORA19C/pdbseed/sysaux01.dbf
/u01/app/oracle/oradata/ORA19C/users01.dbf
/u01/app/oracle/oradata/ORA19C/pdbseed/undotbs01.dbf
/u01/app/oracle/oradata/ORA19C/pdb/system01.dbf
/u01/app/oracle/oradata/ORA19C/pdb/sysaux01.dbf
/u01/app/oracle/oradata/ORA19C/pdb/undotbs01.dbf
/u01/app/oracle/oradata/ORA19C/pdb/users01.dbf

bbed查看文件头

[oracle@localhost db_1]$ bbed
Password: 

BBED: Release 2.0.0.0.0 - Limited Production on Thu Feb 28 15:39:02 2019

Copyright (c) 1982, 2019, Oracle and/or its affiliates.  All rights reserved.

************* !!! For Oracle Internal Use only !!! ***************

BBED> 
BBED> 
BBED> set filename '/u01/app/oracle/oradata/ORA19C/system01.dbf'
        FILENAME        /u01/app/oracle/oradata/ORA19C/system01.dbf

BBED> set blocksize 8192
        BLOCKSIZE       8192

BBED> map
 File: /u01/app/oracle/oradata/ORA19C/system01.dbf (0)
 Block: 1                                     Dba:0x00000000
------------------------------------------------------------
 Data File Header

 struct kcvfh, 1272 bytes                   @0       

 ub4 tailchk                                @8188    


BBED> p kcvfh
struct kcvfh, 1272 bytes                    @0       
   struct kcvfhbfh, 20 bytes                @0       
      ub1 type_kcbh                         @0        0x0b
      ub1 frmt_kcbh                         @1        0xa2
      ub2 wrp2_kcbh                         @2        0x0000
      ub4 rdba_kcbh                         @4        0x00400001
      ub4 bas_kcbh                          @8        0x00000000
      ub2 wrp_kcbh                          @12       0x0000
      ub1 seq_kcbh                          @14       0x01
      ub1 flg_kcbh                          @15       0x04 (KCBHFCKV)
      ub2 chkval_kcbh                       @16       0x6896
      ub2 spare3_kcbh                       @18       0x0000
   struct kcvfhhdr, 76 bytes                @20      
      ub4 kccfhswv                          @20       0x00000000
      ub4 kccfhcvn                          @24       0x13000000
      ub4 kccfhdbi                          @28       0x3edc1c96
      text kccfhdbn[0]                      @32      O
      text kccfhdbn[1]                      @33      R
      text kccfhdbn[2]                      @34      A
      text kccfhdbn[3]                      @35      1
      text kccfhdbn[4]                      @36      9
      text kccfhdbn[5]                      @37      C
      text kccfhdbn[6]                      @38       
      text kccfhdbn[7]                      @39       
      ub4 kccfhcsq                          @40       0x0000040f
      ub4 kccfhfsz                          @44       0x0001bd00
      s_blkz kccfhbsz                       @48       0x00
      ub2 kccfhfno                          @52       0x0001
      ub2 kccfhtyp                          @54       0x0003
      ub4 kccfhacid                         @56       0x00000000
      ub4 kccfhcks                          @60       0x00000000
      text kccfhtag[0]                      @64       
      text kccfhtag[1]                      @65       
      text kccfhtag[2]                      @66       
      text kccfhtag[3]                      @67       
      text kccfhtag[4]                      @68       
      text kccfhtag[5]                      @69       
      text kccfhtag[6]                      @70       
      text kccfhtag[7]                      @71       
      text kccfhtag[8]                      @72       
      text kccfhtag[9]                      @73       
      text kccfhtag[10]                     @74       
      text kccfhtag[11]                     @75       
      text kccfhtag[12]                     @76       
      text kccfhtag[13]                     @77       
      text kccfhtag[14]                     @78       
      text kccfhtag[15]                     @79       
      text kccfhtag[16]                     @80       
      text kccfhtag[17]                     @81       
      text kccfhtag[18]                     @82       
      text kccfhtag[19]                     @83       
      text kccfhtag[20]                     @84       
      text kccfhtag[21]                     @85       
      text kccfhtag[22]                     @86       
      text kccfhtag[23]                     @87       
      text kccfhtag[24]                     @88       
      text kccfhtag[25]                     @89       
      text kccfhtag[26]                     @90       
      text kccfhtag[27]                     @91       
      text kccfhtag[28]                     @92       
      text kccfhtag[29]                     @93       
      text kccfhtag[30]                     @94       
      text kccfhtag[31]                     @95       
   ub4 kcvfhrdb                             @96       0x00400208
   struct kcvfhcrs, 8 bytes                 @100     
      ub4 kscnbas                           @100      0x00000008
      ub2 kscnwrp                           @104      0x8000
      ub2 kscnwrp2                          @106      0x0000
   ub4 kcvfhcrt                             @108      0x3b90f0c2
   ub4 kcvfhrlc                             @112      0x3bb086d8
   struct kcvfhrls, 8 bytes                 @116     
      ub4 kscnbas                           @116      0x001cb1dd
      ub2 kscnwrp                           @120      0x8000
      ub2 kscnwrp2                          @122      0x0000
   ub4 kcvfhbti                             @124      0x00000000
   struct kcvfhbsc, 8 bytes                 @128     
      ub4 kscnbas                           @128      0x00000000
      ub2 kscnwrp                           @132      0x0000
      ub2 kscnwrp2                          @134      0x0000
   ub2 kcvfhbth                             @136      0x0000
   ub2 kcvfhsta                             @138      0x2004 (KCVFHOFZ)
   struct kcvfhckp, 36 bytes                @484     
      struct kcvcpscn, 8 bytes              @484     
         ub4 kscnbas                        @484      0x001d44f9
         ub2 kscnwrp                        @488      0x8000
         ub2 kscnwrp2                       @490      0x0000
      ub4 kcvcptim                          @492      0x3bb08892
      ub2 kcvcpthr                          @496      0x0001
      union u, 12 bytes                     @500     
         struct kcvcprba, 12 bytes          @500     
            ub4 kcrbaseq                    @500      0x00000002
            ub4 kcrbabno                    @504      0x000143d1
            ub2 kcrbabof                    @508      0x0010
      ub1 kcvcpetb[0]                       @512      0x02
      ub1 kcvcpetb[1]                       @513      0x00
      ub1 kcvcpetb[2]                       @514      0x00
      ub1 kcvcpetb[3]                       @515      0x00
      ub1 kcvcpetb[4]                       @516      0x00
      ub1 kcvcpetb[5]                       @517      0x00
      ub1 kcvcpetb[6]                       @518      0x00
      ub1 kcvcpetb[7]                       @519      0x00
   ub4 kcvfhcpc                             @140      0x00000032
   ub4 kcvfhrts                             @144      0x3bb08686
   ub4 kcvfhccc                             @148      0x00000031
   struct kcvfhbcp, 36 bytes                @152     
      struct kcvcpscn, 8 bytes              @152     
         ub4 kscnbas                        @152      0x00000000
         ub2 kscnwrp                        @156      0x0000
         ub2 kscnwrp2                       @158      0x0000
      ub4 kcvcptim                          @160      0x00000000
      ub2 kcvcpthr                          @164      0x0000
      union u, 12 bytes                     @168     
         struct kcvcprba, 12 bytes          @168     
            ub4 kcrbaseq                    @168      0x00000000
            ub4 kcrbabno                    @172      0x00000000
            ub2 kcrbabof                    @176      0x0000
      ub1 kcvcpetb[0]                       @180      0x00
      ub1 kcvcpetb[1]                       @181      0x00
      ub1 kcvcpetb[2]                       @182      0x00
      ub1 kcvcpetb[3]                       @183      0x00
      ub1 kcvcpetb[4]                       @184      0x00
      ub1 kcvcpetb[5]                       @185      0x00
      ub1 kcvcpetb[6]                       @186      0x00
      ub1 kcvcpetb[7]                       @187      0x00
   ub4 kcvfhbhz                             @312      0x00000000
   struct kcvfhxcd, 16 bytes                @316     
      ub4 space_kcvmxcd[0]                  @316      0x00000000
      ub4 space_kcvmxcd[1]                  @320      0x00000000
      ub4 space_kcvmxcd[2]                  @324      0x00000000
      ub4 space_kcvmxcd[3]                  @328      0x00000000
   sword kcvfhtsn                           @332      0
   ub2 kcvfhtln                             @336      0x0006
   text kcvfhtnm[0]                         @338     S
   text kcvfhtnm[1]                         @339     Y
   text kcvfhtnm[2]                         @340     S
   text kcvfhtnm[3]                         @341     T
   text kcvfhtnm[4]                         @342     E
   text kcvfhtnm[5]                         @343     M
   text kcvfhtnm[6]                         @344      
   text kcvfhtnm[7]                         @345      
   text kcvfhtnm[8]                         @346      
   text kcvfhtnm[9]                         @347      
   text kcvfhtnm[10]                        @348      
   text kcvfhtnm[11]                        @349      
   text kcvfhtnm[12]                        @350      
   text kcvfhtnm[13]                        @351      
   text kcvfhtnm[14]                        @352      
   text kcvfhtnm[15]                        @353      
   text kcvfhtnm[16]                        @354      
   text kcvfhtnm[17]                        @355      
   text kcvfhtnm[18]                        @356      
   text kcvfhtnm[19]                        @357      
   text kcvfhtnm[20]                        @358      
   text kcvfhtnm[21]                        @359      
   text kcvfhtnm[22]                        @360      
   text kcvfhtnm[23]                        @361      
   text kcvfhtnm[24]                        @362      
   text kcvfhtnm[25]                        @363      
   text kcvfhtnm[26]                        @364      
   text kcvfhtnm[27]                        @365      
   text kcvfhtnm[28]                        @366      
   text kcvfhtnm[29]                        @367      
   ub4 kcvfhrfn                             @368      0x00000001
   struct kcvfhrfs, 8 bytes                 @372     
      ub4 kscnbas                           @372      0x00000000
      ub2 kscnwrp                           @376      0x0000
      ub2 kscnwrp2                          @378      0x0000
   ub4 kcvfhrft                             @380      0x00000000
   struct kcvfhafs, 8 bytes                 @384     
      ub4 kscnbas                           @384      0x00000000
      ub2 kscnwrp                           @388      0x0000
      ub2 kscnwrp2                          @390      0x0000
   ub4 kcvfhbbc                             @392      0x00000000
   ub4 kcvfhncb                             @396      0x00000000
   ub4 kcvfhmcb                             @400      0x00000000
   ub4 kcvfhlcb                             @404      0x00000000
   ub4 kcvfhbcs                             @408      0x00000000
   ub2 kcvfhofb                             @412      0x000a
   ub2 kcvfhnfb                             @414      0x000a
   ub4 kcvfhprc                             @416      0x3b90f0b9
   struct kcvfhprs, 8 bytes                 @420     
      ub4 kscnbas                           @420      0x00000001
      ub2 kscnwrp                           @424      0x0000
      ub2 kscnwrp2                          @426      0x0000
   struct kcvfhprfs, 8 bytes                @428     
      ub4 kscnbas                           @428      0x00000000
      ub2 kscnwrp                           @432      0x0000
      ub2 kscnwrp2                          @434      0x0000
   ub4 kcvfhtrt                             @444      0x00000000

由于scn做了调整,能够支持更大的scn值,因此在19C的bbed中明显能够看到kscnwrp2

ORA-600 kcbzib_kcrsds_1报错

Posted on 2019 年 02 月 24 日 by 惜分飞

联系：手机/微信(+86 17813235971) QQ(107644445)

标题：ORA-600 kcbzib_kcrsds_1报错

数据库版本

客户存储故障，修复之后，多套库增加_allow_resetlogs_corruption隐含参数强制拉库出现都类似错误ORA-600 kcbzib_kcrsds_1,出现这个错误,一般都是由于数据库不一致强制拉库导致

2019-02-23T01:25:43.125621+08:00
 alter database open resetlogs
2019-02-23T01:25:43.231990+08:00
RESETLOGS is being done without consistancy checks. This may result
in a corrupted database. The database should be recreated.
RESETLOGS after incomplete recovery UNTIL CHANGE 149251865354 time 
Clearing online redo logfile 1 +DG_XFF/xifenfei/ONLINELOG/group_1.258.983824407
Clearing online redo logfile 2 +DG_XFF/xifenfei/ONLINELOG/group_2.259.983824409
Clearing online redo logfile 3 +DG_XFF/xifenfei/ONLINELOG/group_3.266.983825461
Clearing online redo logfile 4 +DG_XFF/xifenfei/ONLINELOG/group_4.267.983825461
Clearing online log 1 of thread 1 sequence number 20749
Clearing online log 2 of thread 1 sequence number 20750
Clearing online log 3 of thread 2 sequence number 1371
Clearing online log 4 of thread 2 sequence number 1372
2019-02-23T01:25:44.669890+08:00
ALTER SYSTEM SET remote_listener=' xifenfeidb-cluster-scan:1521' SCOPE=MEMORY SID='xifenfei2';
2019-02-23T01:25:44.671436+08:00
ALTER SYSTEM SET listener_networks='' SCOPE=MEMORY SID='xifenfei2';
2019-02-23T01:25:46.990077+08:00
Clearing online redo logfile 1 complete
Clearing online redo logfile 2 complete
Clearing online redo logfile 3 complete
Clearing online redo logfile 4 complete
Resetting resetlogs activation ID 3002369299 (0xb2f48513)
Online log +DG_XFF/xifenfei/ONLINELOG/group_1.258.983824407: Thread 1 Group 1 was previously cleared
Online log +DG_XFF/xifenfei/ONLINELOG/group_2.259.983824409: Thread 1 Group 2 was previously cleared
Online log +DG_XFF/xifenfei/ONLINELOG/group_3.266.983825461: Thread 2 Group 3 was previously cleared
Online log +DG_XFF/xifenfei/ONLINELOG/group_4.267.983825461: Thread 2 Group 4 was previously cleared
2019-02-23T01:25:47.137701+08:00
Setting recovery target incarnation to 2
2019-02-23T01:25:47.152393+08:00
This instance was first to open
Ping without log force is disabled:
  not an Exadata system.
Picked broadcast on commit scheme to generate SCNs
Endian type of dictionary set to little
2019-02-23T01:25:47.597502+08:00
Assigning activation ID 3019587675 (0xb3fb405b)
2019-02-23T01:25:47.625734+08:00
TT00: Gap Manager starting (PID:22467)
2019-02-23T01:25:47.910026+08:00
Thread 2 opened at log sequence 1
  Current log# 3 seq# 1 mem# 0: +DG_XFF/xifenfei/ONLINELOG/group_3.266.983825461
Successful open of redo thread 2
2019-02-23T01:25:47.911069+08:00
MTTR advisory is disabled because FAST_START_MTTR_TARGET is not set
2019-02-23T01:25:47.971709+08:00
Sleep 5 seconds and then try to clear SRLs in 2 time(s)
2019-02-23T01:25:48.065008+08:00
start recovery: pdb 0, passed in flags x10 (domain enable 0) 
2019-02-23T01:25:48.065177+08:00
Instance recovery: looking for dead threads
Instance recovery: lock domain invalid but no dead threads
validate pdb 0, flags x10, valid 0, pdb flags x84 
* validated domain 0, flags = 0x80
Instance recovery complete: valid 1 (flags x10, recovery domain flags x80) 
2019-02-23T01:25:48.803746+08:00
Errors in file /oracle/base/oracle/diag/rdbms/xifenfei/xifenfei2/trace/xifenfei2_ora_21292.trc  (incident=128552):
ORA-00600: internal error code, arguments: [kcbzib_kcrsds_1], [], [], [], [], [], [], [], [], [], [], []
Use ADRCI or Support Workbench to package the incident.
See Note 411.1 at My Oracle Support for error and packaging details.
2019-02-23T01:25:49.947062+08:00
*****************************************************************
An internal routine has requested a dump of selected redo.
This usually happens following a specific internal error, when
analysis of the redo logs will help Oracle Support with the
diagnosis.
It is recommended that you retain all the redo logs generated (by
all the instances) during the past 12 hours, in case additional
redo dumps are required to help with the diagnosis.
*****************************************************************
2019-02-23T01:25:50.334684+08:00
Errors in file /oracle/base/oracle/diag/rdbms/xifenfei/xifenfei2/trace/xifenfei2_ora_21292.trc:
ORA-00600: internal error code, arguments: [kcbzib_kcrsds_1], [], [], [], [], [], [], [], [], [], [], []
2019-02-23T01:25:50.334880+08:00
Errors in file /oracle/base/oracle/diag/rdbms/xifenfei/xifenfei2/trace/xifenfei2_ora_21292.trc:
ORA-00600: internal error code, arguments: [kcbzib_kcrsds_1], [], [], [], [], [], [], [], [], [], [], []
Error 600 happened during db open, shutting down database
2019-02-23T01:25:50.362808+08:00
Errors in file /oracle/base/oracle/diag/rdbms/xifenfei/xifenfei2/trace/xifenfei2_ora_21292.trc  (incident=128553):
ORA-00603: ORACLE server session terminated by fatal error
ORA-01092: ORACLE instance terminated. Disconnection forced
ORA-00600: internal error code, arguments: [kcbzib_kcrsds_1], [], [], [], [], [], [], [], [], [], [], []
2019-02-23T01:25:51.521133+08:00
opiodr aborting process unknown ospid (21292) as a result of ORA-603

另外出现该错误之后，数据库再次恢复会出现类似,这个是由于open库的过程中导致控制文件损坏而出现的错误.

2019-02-23T22:52:39.390966+08:00
ALTER DATABASE RECOVER  database  
2019-02-23T22:52:39.391125+08:00
Media Recovery Start
 Started logmerger process
2019-02-23T22:52:39.471904+08:00
Media Recovery failed with error 16433
2019-02-23T22:52:39.996235+08:00
Errors in file /oracle/base/oracle/diag/rdbms/xifenfei/xifenfei1/trace/xifenfei1_m000_1593.trc:
ORA-01110: data file 11: '+DG_XFF/xifenfei/DATAFILE/ls_zh.274.984235699'
2019-02-23T22:52:40.224440+08:00
Errors in file /oracle/base/oracle/diag/rdbms/xifenfei/xifenfei1/trace/xifenfei1_m000_1593.trc:
ORA-01110: data file 12: '+DG_XFF/xifenfei/DATAFILE/sf_zh.275.984235715'
2019-02-23T22:52:40.459606+08:00
Errors in file /oracle/base/oracle/diag/rdbms/xifenfei/xifenfei1/trace/xifenfei1_m000_1593.trc:
ORA-01110: data file 13: '+DG_XFF/xifenfei/DATAFILE/tj_gl.276.984235729'
2019-02-23T22:52:40.574563+08:00
Recovery Slave PR00 previously exited with exception 283
ORA-283 signalled during: ALTER DATABASE RECOVER  database  ...

Input/output error故障恢复

Posted on 2019 年 02 月 24 日 by 惜分飞

联系：手机/微信(+86 17813235971) QQ(107644445)

标题：Input/output error故障恢复

客户由于硬件故障，导致数据文件出现io错误

oracle@linux1:~> dd if=/oradata/orcl/system01.dbf of=/oradata/orcl/system01.dbf_bak bs=8192
dd: reading `/oradata/orcl/system01.dbf': Input/output error
83871+0 records in
83871+0 records out
687071232 bytes (687 MB) copied, 1.07177 s, 641 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
83871+0 records in
83871+0 records out
687071232 bytes (687 MB) copied, 1.0731 s, 640 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
83871+0 records in
83871+0 records out
687071232 bytes (687 MB) copied, 1.07431 s, 640 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 4.11649 s, 169 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 5.64775 s, 124 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 7.1791 s, 97.2 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 8.70247 s, 80.2 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 10.2258 s, 68.2 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 10.2272 s, 68.2 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 10.2284 s, 68.2 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 10.2296 s, 68.2 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85158+1 records in
85158+1 records out
697618432 bytes (698 MB) copied, 10.2309 s, 68.2 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85170+1 records in
85170+1 records out
697716736 bytes (698 MB) copied, 11.7563 s, 59.3 MB/s
dd: reading `/oradata/orcl/system01.dbf': Input/output error
85170+1 records in
85170+1 records out
697716736 bytes (698 MB) copied, 13.3038 s, 52.4 MB/s
93431+1 records in
93431+1 records out
765390848 bytes (765 MB) copied, 18.2578 s, 41.9 MB/s

这个明显io错误比较多,无法直接使用以前的dd方法较好的恢复数据,只能通过linux平台的一些io工具修复文件(或者直接把磁盘挂载到win上通过工具处理),然后下载到win机器之后效果不错,只有17个坏块

C:\Users\XIFENFEI>dbv file=f:/11.2.0.1/system01.dbf

DBVERIFY: Release 10.2.0.3.0 - Production on 星期日 2月 24 22:46:59 2019

Copyright (c) 1982, 2005, Oracle.  All rights reserved.

DBVERIFY - 开始验证: FILE = f:/11.2.0.1/system01.dbf
页 83871 标记为损坏
Corrupt block relative dba: 0x0041479f (file 1, block 83871)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 83872 标记为损坏
Corrupt block relative dba: 0x004147a0 (file 1, block 83872)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 83873 标记为损坏
Corrupt block relative dba: 0x004147a1 (file 1, block 83873)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0x759c0601
 check value in block header: 0xe5e5
 computed block checksum: 0xddc6

页 85161 流入 - 很可能是介质损坏
Corrupt block relative dba: 0x00414ca9 (file 1, block 85161)
Fractured block found during dbv:
Data in bad block:
 type: 6 format: 2 rdba: 0x00414ca9
 last change scn: 0x0000.0ce20ac2 seq: 0x2 flg: 0x04
 spare1: 0x0 spare2: 0x0 spare3: 0x0
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0x47f0
 computed block checksum: 0xc3ab

页 85162 标记为损坏
Corrupt block relative dba: 0x00414caa (file 1, block 85162)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85163 标记为损坏
Corrupt block relative dba: 0x00414cab (file 1, block 85163)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85164 标记为损坏
Corrupt block relative dba: 0x00414cac (file 1, block 85164)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85165 标记为损坏
Corrupt block relative dba: 0x00414cad (file 1, block 85165)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85166 标记为损坏
Corrupt block relative dba: 0x00414cae (file 1, block 85166)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85167 标记为损坏
Corrupt block relative dba: 0x00414caf (file 1, block 85167)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85177 流入 - 很可能是介质损坏
Corrupt block relative dba: 0x00414cb9 (file 1, block 85177)
Fractured block found during dbv:
Data in bad block:
 type: 6 format: 2 rdba: 0x00414cb9
 last change scn: 0x0000.0ce55ebf seq: 0x1 flg: 0x06
 spare1: 0x0 spare2: 0x0 spare3: 0x0
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0x54c9
 computed block checksum: 0x5ce5

页 85178 标记为损坏
Corrupt block relative dba: 0x00414cba (file 1, block 85178)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85179 标记为损坏
Corrupt block relative dba: 0x00414cbb (file 1, block 85179)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85180 标记为损坏
Corrupt block relative dba: 0x00414cbc (file 1, block 85180)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85181 标记为损坏
Corrupt block relative dba: 0x00414cbd (file 1, block 85181)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85182 标记为损坏
Corrupt block relative dba: 0x00414cbe (file 1, block 85182)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0

页 85183 标记为损坏
Corrupt block relative dba: 0x00414cbf (file 1, block 85183)
Bad header found during dbv:
Data in bad block:
 type: 229 format: 5 rdba: 0xe5e5e5e5
 last change scn: 0xe5e5.e5e5e5e5 seq: 0xe5 flg: 0xe5
 spare1: 0xe5 spare2: 0xe5 spare3: 0xe5e5
 consistency value in tail: 0xe5e5e5e5
 check value in block header: 0xe5e5
 computed block checksum: 0x0



DBVERIFY - 验证完成

检查的页总数: 93440
处理的页总数 (数据): 64294
失败的页总数 (数据): 0
处理的页总数 (索引): 12616
失败的页总数 (索引): 0
处理的页总数 (其它): 3111
处理的总页数 (段)  : 0
失败的总页数 (段)  : 0
空的页总数: 13402
标记为损坏的总页数: 17
流入的页总数: 2
最高块 SCN            : 1073748415 (0.1073748415)

C:\Users\XIFENFEI>

经过一系列恢复,数据库强制打开,数据库后台报ORA-7445 kkogbro

Completed: alter database open resetlogs upgrade
Sun Feb 24 18:06:36 2019
MMON started with pid=15, OS id=9032 
Sun Feb 24 18:07:50 2019
Errors in file d:\app\diag\rdbms\orcl\orcl\trace\orcl_ora_8336.trc:
Sun Feb 24 18:07:52 2019
Trace dumping is performing id=[cdmp_20190224180752]
Sun Feb 24 18:09:42 2019
alter tablespace temp add tempfile 'f:/11.2.0.1/temp01.dbf' size 128m autoextend on
Completed: alter tablespace temp add tempfile 'f:/11.2.0.1/temp01.dbf' size 128m autoextend on
Exception [type: ACCESS_VIOLATION, UNABLE_TO_READ] [ADDR:0x166] [PC:0x38E41AD, kkogbro()+497]
ERROR: Unable to normalize symbol name for the following short stack (at offset 199):
dbgexProcessError()+193<-dbgeExecuteForError()+65<-dbgePostErrorKGE()+1726<-dbkePostKGE_kgsf()+
75<-kgeade()+560<-kgerev()+125<-kgerec5()+60<-sss_xcpt_EvalFilterEx()+1869<-
sss_xcpt_EvalFilter()+174<-.1.6_8+59<-0000000077207388<-000000007721BF7D<-
00000000771F043A<-000000007721B61E<-kkogbro()+497<-kkogjro()+99<-kkojnp()
+10299<-kkocnp()+78<-kkooqb()+1549<-kkoqbc()+2474<-apakkoqb()+200<-
apaqbdDescendents()+496<-apaqbdList()+79<-apaqbdDescendents()+795<-
apaqbdList()+79<-apaqbd()+17<-apadrv()+818<-opitca()+2518<-kksLoadChild()+9008
<-kxsGetRuntimeLock()+2320<-kksfbc()+15225<-kkspbd0()+669<-kksParseCursor()+741
<-opiosq0()+2538<-opipls()+12841<-opiodr()+1662<-rpidrus()+862<-rpidru()+154
<-rpiswu2()+2757<-rpidrv()+6105<-psddr0()+614<-psdnal()+510<-pevm_EXECC()+365
<-pfrinstr_EXECC()+90<-pfrrun_no_tool()+65<-pfrrun()+1241<-plsql_run()+875
<-peicnt()+329<-kkxexe()+616<-opiexe()+20006
Errors in file d:\app\diag\rdbms\orcl\orcl\trace\orcl_ora_8336.trc  (incident=2540):
ORA-07445: 出现异常错误: 核心转储 [kkogbro()+497] [ACCESS_VIOLATION] [ADDR:0x166] [PC:0x38E41AD] [UNABLE_TO_READ] []
Incident details in: d:\app\diag\rdbms\orcl\orcl\incident\incdir_2540\orcl_ora_8336_i2540.trc

通过分析trace文件,确认是和坏块有关系,对于上述坏块进行处理之后,数据正常导出.

ORA-704 ORA-604 ORA-942 恢复

Posted on 2018 年 12 月 11 日 by 惜分飞

接到客户请求,oracle数据库停机重启维护之后,无法正常启动,请求我们给予协助
数据库启动报ORA-00704 ORA-00604 ORA-00942错误

SQL> select * from v$version;

BANNER
--------------------------------------------------------------------------------

Oracle Database 11g Enterprise Edition Release 11.2.0.1.0 - Production
PL/SQL Release 11.2.0.1.0 - Production
CORE    11.2.0.1.0      Production
TNS for 32-bit Windows: Version 11.2.0.1.0 - Production
NLSRTL Version 11.2.0.1.0 - Production

SQL> startup open
ORACLE 例程已经启动。

Total System Global Area 1288949760 bytes
Fixed Size                  1376520 bytes
Variable Size             377491192 bytes
Database Buffers          897581056 bytes
Redo Buffers               12500992 bytes
数据库装载完毕。
ORA-01092: ORACLE instance terminated. Disconnection forced
ORA-00704: bootstrap process failure
ORA-00604: error occurred at recursive SQL level 1
ORA-00942: table or view does not exist
进程 ID: 2756
会话 ID: 5 序列号: 9

alert日志报错

SMON: enabling cache recovery
Errors in file d:\app\administrator\diag\rdbms\xff\xff\trace\xff_ora_2756.trc:
ORA-00704: 引导程序进程失败
ORA-00604: 递归 SQL 级别 1 出现错误
ORA-00942: 表或视图不存在
Errors in file d:\app\administrator\diag\rdbms\xff\xff\trace\xff_ora_2756.trc:
ORA-00704: 引导程序进程失败
ORA-00604: 递归 SQL 级别 1 出现错误
ORA-00942: 表或视图不存在
Error 704 happened during db open, shutting down database
USER (ospid: 2756): terminating the instance due to error 704
Instance terminated by USER, pid = 2756
ORA-1092 signalled during: ALTER DATABASE OPEN...
opiodr aborting process unknown ospid (2756) as a result of ORA-1092
Fri Nov 30 12:51:26 2018
ORA-1092 : opitsk aborting process

根据这些年的恢复经验,恢复过相关错误的主要有:
ORA-01092 ORA-00704 ORA-00942
Oracle 11g丢失access$恢复方法
 11.1.0.7版本也会出现access$表丢失导致数据库无法启动
总结主要两类:1. 由于某种bug导致access$表丢失的故障,另外一种是由于坏块导致数据库核心基表损坏引起,对于这个库进行分析

dbv 检查坏块

D:\APP\ADMINISTRATOR\ORADATA\ORCL> dbv file=system01.dbf

DBVERIFY: Release 10.2.0.3.0 - Production on 星期五 11月 30 15:11:24 2018

Copyright (c) 1982, 2005, Oracle.  All rights reserved.

DBVERIFY - 开始验证: FILE = system01.dbf


DBVERIFY - 验证完成

检查的页总数: 93440
处理的页总数 (数据): 61979
失败的页总数 (数据): 0
处理的页总数 (索引): 13560
失败的页总数 (索引): 0
处理的页总数 (其它): 3067
处理的总页数 (段)  : 0
失败的总页数 (段)  : 0
空的页总数: 14834
标记为损坏的总页数: 0
流入的页总数: 0
最高块 SCN            : 659587683 (0.659587683)

通过dbv检查数据库,确定system没有坏块
10046跟踪数据库启动

SQL> startup mount;
ORACLE 例程已经启动。

Total System Global Area 1288949760 bytes
Fixed Size                  1376520 bytes
Variable Size             377491192 bytes
Database Buffers          897581056 bytes
Redo Buffers               12500992 bytes
数据库装载完毕。

SQL> oradebug setmypid
已处理的语句
SQL> alter session set db_file_multiblocK_read_count=1;

会话已更改。

SQL> ALTER SESSION SET EVENTS '704 trace name errorstack level 3';

会话已更改。

SQL> oradebug EVENT 10046 TRACE NAME CONTEXT FOREVER, LEVEL 12
已处理的语句
SQL> oradebug TRACEFILE_NAME
d:\app\administrator\diag\rdbms\xff\xff\trace\xff_ora_1132.trc

--trace信息
PARSE ERROR #3:len=208 dep=1 uid=0 oct=9 lid=0 tim=5838844893 err=942
CREATE UNIQUE INDEX I_OBJ1 ON OBJ$(OBJ#,OWNER#,TYPE#) PCTFREE 10 INITRANS 2 MAXTRANS 255 
STORAGE (  INITIAL 64K NEXT 1024K MINEXTENTS 1 MAXEXTENTS 2147483645
 PCTINCREASE 0 OBJNO 36 EXTENTS (FILE 1 BLOCK 336))

*** 2018-11-30 13:18:07.593
dbkedDefDump(): Starting a non-incident diagnostic dump (flags=0x0, level=3, mask=0x0)
----- Error Stack Dump -----
ORA-00704: 引导程序进程失败
ORA-00604: 递归 SQL 级别 1 出现错误
ORA-00942: 表或视图不存在

通过这一步基本上可以判断是由于obj$表丢失导致数据库创建I_OBJ1 index不成功,从而使得数据库无法正常启动。通过一些技巧修复出来obj$表,尝试启动数据库

SQL> alter database open;
alter database open
*
第 1 行出现错误:
ORA-01092: ORACLE instance terminated. Disconnection forced
ORA-00704: bootstrap process failure
ORA-00600: internal error code, arguments: [4000], [6], [], [], [], [], [], [],
[], [], [], []
进程 ID: 836
会话 ID: 355 序列号: 6027

alert日志报错

Fri Nov 30 15:33:47 2018
SMON: enabling cache recovery
Errors in file d:\app\administrator\diag\rdbms\xff\xff\trace\xff_ora_836.trc  (incident=648694):
ORA-00600: 内部错误代码, 参数: [4000], [6], [], [], [], [], [], [], [], [], [], []
Incident details in: d:\app\administrator\diag\rdbms\xff\xff\incident\incdir_648694\xff_ora_836_i648694.trc
Errors in file d:\app\administrator\diag\rdbms\xff\xff\trace\xff_ora_836.trc:
ORA-00704: 引导程序进程失败
ORA-00600: 内部错误代码, 参数: [4000], [6], [], [], [], [], [], [], [], [], [], []
Errors in file d:\app\administrator\diag\rdbms\xff\xff\trace\xff_ora_836.trc:
ORA-00704: 引导程序进程失败
ORA-00600: 内部错误代码, 参数: [4000], [6], [], [], [], [], [], [], [], [], [], []
Error 704 happened during db open, shutting down database
USER (ospid: 836): terminating the instance due to error 704
Fri Nov 30 15:33:57 2018
Instance terminated by USER, pid = 836
ORA-1092 signalled during: alter database open...
opiodr aborting process unknown ospid (836) as a result of ORA-1092

发现数据库启动报ORA-00704和ORA-00600: internal error code, arguments: [4000], [6], [], [], [], [], [], [],错误,根据以往经验,该问题是由于回滚段异常导致
分析trace信息

Dump continued from file: d:\app\administrator\diag\rdbms\xff\xff\trace\xff_ora_2124.trc
ORA-00600: 内部错误代码, 参数: [4000], [6], [], [], [], [], [], [], [], [], [], []

========= Dump for incident 651102 (ORA 600 [4000]) ========

*** 2018-11-30 15:46:32.125
dbkedDefDump(): Starting incident default dumps (flags=0x2, level=3, mask=0x0)
----- Current SQL Statement for txff session (sql_id=6apq2rjyxmxpj) -----
select line#, sql_text from bootstrap$ where obj# != :1

----- Call Stack Trace -----
calling              call     entry                argument values in hex      
location             type     point                (? means dubious value)     
-------------------- -------- -------------------- ----------------------------
_skdstdst()+121      CALLrel  _kgdsdst()           E6B1614 2
_ksedst1()+93        CALLrel  _skdstdst()          E6B1614 0 1 436646 435BE2
                                                   436646
_ksedst()+49         CALLrel  _ksedst1()           0 1
_dbkedDefDump()+367  CALLrel  _ksedst()            0
2                                                  
_ksedmp()+44         CALLrel  _dbkedDefDump()      3 2
_ksfdmp()+56         CALLrel  _ksedmp()            3EB
_dbgexPhaseII()+164  CALLreg  00000000             C378A28 3EB
0                                                  
_dbgexProcessError(  CALLrel  _dbgexPhaseII()      E2B0454 E2B6E50 E6B6078
)+2061                                             
_dbgeExecuteForErro  CALLrel  _dbgexProcessError(  E2B0454 E2B6E50 1 0 E2B0454
r()+43                        )                    E2B6E50
__VInfreq__dbgePost  CALLrel  _dbgeExecuteForErro  E2B0454 E2B6E50 0 1 0
ErrorKGE()+260                r()                  
_dbkePostKGE_kgsf()  CALLrel  _dbgePostErrorKGE()  C378A28 E2BD274 258
+56                                                
_kgeade()+299        CALLreg  00000000             C378A28 E2BD274 258
_kgeriv_int()+79     CALLrel  _kgeade()            C378A28 C378B50 E2BD274 258 0
                                                   FA0 0 0 0 1 E6B68A8
_kgeriv()+22         CALLrel  _kgeriv_int()        C378A28 E2BD274 FA0 0 1
                                                   E6B68A8
_kgeasi()+107        CALLrel  _kgeriv()            C378A28 E2BD274 FA0 1 E6B68A8
__VInfreq__ktuGetUs  CALLrel  _kgeasi()            C378A28 E2BD274 FA0 2 1 0 6 0
egDba()+123                                        
_ktrgcm()+5147       CALLrel  _ktuGetUsegDba()     6 E6B6E78 0 0 E6B6F48 0
_ktrget2()+596       CALLrel  _ktrgcm()            F9FCEAC
_kdst_fetch()+816    CALLrel  _ktrget2()           F9FCEAC F9FCE24 303 0
_kdstf11001010000km  CALLrel  _kdst_fetch()        1 F9FCEA8 E6B71C8
()+2806                                            
_kdsttgr()+5944      CALLrel  _kdstf11001010000km  F9FCEA8 0 557B044C F9FCDF8
                              ()                   2F0C3A6 E6B7894
_qertbFetch()+767    CALLrel  _kdsttgr()           F9FCEA8 0 557B044C F9FCDF8
                                                   557B0498 2F0C3A6 E6B7894 1
_opifch2()+2729      CALLptr  00000000             E6E7228 0 0 2 26180001
_opifch()+53         CALLrel  _opifch2()           89 5 E6B7A04
_opiodr()+1248       CALLreg  00000000             5 2 E6B81FC
_rpidrus()+186       CALLrel  _opiodr()            5 2 E6B81FC 2
_rpidru()+90         CALLrel  _rpidrus()           E6B7D58
_rpiswu2()+557       CALLrel  _rpidru()            E6B813C
_rpidrv()+1242       CALLrel  _rpiswu2()           6D9A295C 0 6D9A29A8 2 E6B8184
                                                   0 6D9A2A28 0 0 544632 5448D6
                                                   E6B813C 8
_rpifch()+43         CALLrel  _rpidrv()            2 5 E6B81FC 8
_kqlbebs()+1213      CALLrel  _rpifch()            2 2 2 F013232 FA0 1 0 E6B8634
                                                   0 0 0 0 0
_kqlblfc()+175       CALLrel  _kqlbebs()           0 E6BBBD4
_adbdrv()+16992      CALLrel  _kqlblfc()           0 E6BBBD4
_opiexe()+13594      CALLrel  _adbdrv()            4A 6E6CBC48 6DDEDB6C E6BBD68
                                                   6D60697 6E6CBC48
_opiosq0()+6248      CALLrel  _opiexe()            4 0 E6BC734
_kpooprx()+277       CALLrel  _opiosq0()           3 E E6BC9A0 A4 0
_kpoal8()+632        CALLrel  _kpooprx()           E6BF0A4 E6BD420 1B 1 0 A4
_opiodr()+1248       CALLreg  00000000             5E 1C E6BF0A0
_ttcpip()+1051       CALLreg  00000000             5E 1C E6BF0A0 0
_opitsk()+1404       CALL???  00000000             C3832A8 5E E6BF0A0 0 E6BED30
                                                   E6BF1CC 53E52E 0 E6BF1F8
_opiino()+980        CALLrel  _opitsk()            0 0
_opiodr()+1248       CALLreg  00000000             3C 4 E6BFBF4
_opidrv()+1201       CALLrel  _opiodr()            3C 4 E6BFBF4 0
_sou2o()+55          CALLrel  _opidrv()            3C 4 E6BFBF4
_opimai_real()+124   CALLrel  _sou2o()             E6BFC04 3C 4 E6BFBF4
_opimai()+125        CALLrel  _opimai_real()       2 E6BFC2C
_OracleThreadStart@  CALLrel  _opimai()            2 E6BFF6C 7C9BA7F4 E6BFC34 0
4()+830                                            E6BFD04
7C82484C             CALLreg  00000000             E5BFF9C 0 0 E5BFF9C 0 E6BFFC4
00000000             CALL???  00000000             
 

--------------------- Binary Stack Dump ---------------------


Block header dump:  0x0040020b
 Object id on Block? Y
 seg/obj: 0x3b  csc: 0x00.27508136  itc: 1  flg: O  typ: 1 - DATA
     fsl: 0  fnx: 0x0 ver: 0x01
 
 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0x0006.015.0005e9e9  0x00c0052c.916f.10  --U-    1  fsc 0x0000.27508263

报错比较明显该block比较异常,通过bbed修改相关block信息,相关处理参考:
重现ORA-600 4000异常
 通过bbed解决ORA-00600[4000]案例
 记录一次ORA-600 4000数据库故障恢复
 ORACLE 8.1.7 数据库ORA-600 4000故障恢复

处理之后数据库正常open

Fri Nov 30 15:57:34 2018
SMON: enabling cache recovery
Successfully onlined Undo Tablespace 2.
Verifying file header compatibility for 11g tablespace encryption..
Verifying 11g file header compatibility for tablespace encryption completed
SMON: enabling tx recovery
Database Characterset is ZHS16GBK
No Resource Manager plan active
replication_dependency_tracking turned off (no async multimaster replication found)
Fri Nov 30 15:57:38 2018
Starting background process QMNC
Fri Nov 30 15:57:38 2018
QMNC started with pid=23, OS id=1152 
Completed: alter database open

ALERT: Database Corruption ORA-600 ORA-7445 errors after applying AIX SP patches – AIX 6.1.9.8 or AIX 7.1.3.8 or AIX 7.1.4.3 or AIX 7.2.0.3 or AIX 7.2.1.0, 01

Posted on 2018 年 06 月 16 日 by 惜分飞

APPLIES TO:

Oracle Database – Enterprise Edition – Version 11.2.0.3 to 12.2.0.1 [Release 11.2 to 12.2]
IBM AIX on POWER Systems (64-bit)
A problem has been discovered in the latest SP patches for IBM AIX 6.1 and 7.1 (SP 08 and SP 03) where 11.2.0.3, 11.2.0.4, or 12.1 or 12.2 are running. ORA-600 errors and possible database corruption.

upgrade from AIX 6.1.9.7 to SP08
upgrade from AIX 7.1.4.2 to SP03
or running on one of the oslevels listed below in this note.

This is only known to impact Oracle 11.2.0.3.x, 11.2.0.4.x, 12.1.0.2, or 12.2.0.1 on AIX platforms. It has been observed on various Oracle PSU versions.
The symptoms observed so far are ORA-600 memory related failures with examples below.
Additionally, Redo log corruption has been observed in at least two cases.

DESCRIPTION

Database Corruption and/or ORA-600 ORA-7445 errors after applying IBM AIX SP patches – After update from AIX 6.1.9.7 to SP08 or AIX 7.1.4.2 to SP03 (note the earlier service packs (SP 07 or SP 02 are not impacted)

OCCURRENCE

The only changes were upgrades to the latest IBM SP patches.

upgraded from AIX 6.1.9.7 to SP08 –> SP08 has the problem.
upgraded from AIX 7.1.4.2 to SP03 –> SP03 has the problem.

To check for AIX patch levels that are exposed to this risk, run the following command and look for any of the following:

# oslevel -s

If any of the following are listed, exposure to this problem exists:

6100-09-08
7100-03-08
7100-04-03
7200-00-03
7200-01-00
7200-01-01

SYMPTOMS

The following ORA-600 errors have been observed. Note that not all errors are needed, and not all customers have seen all these errors.

=========================================================================================
ORA-00600: internal error code, arguments: [kkoipt:invalid aptyp], [0], [0], [], [], [], [], [], [], [], [], []
Optimizer – Maps the structures from memory
=========================================================================================
ORA-00600: internal error code, arguments: [kghssgai2], [1], [32], [], [], [], [], [], [], [], [], []
–looks to be pga related allocations
Generic memory Heap manager -we can’t have both a heap and an allocation function passed in to us
=========================================================================================
ORA-00600: internal error code, arguments: [qkkAssignKey:1], [], [], [], [], [], [], [], [], [], [], []
qkkAssignKey – copy keys from source to destination key
=========================================================================================
ORA-00600: internal error code, arguments: [kclgclks_3], [454], [2431642561], [], [], [], [], [], [], [], [], []
kclgclks – CR Server request
=========================================================================================
ORA-00600: internal error code, arguments: [kkqvmRmViewFromLst1], [], [], [], [], [], [], [], [], [], [], []
View Merging – list management
=========================================================================================
ORA-00600: internal error code, arguments: [kghstack_underflow_internal_1], [0x082024000], [rpi role space], [], [], [], [], [], [], [], [], []
shared heap manager Stack segment underflow, failure to follow stack discipline.
assert no previous chunk in this segment
=========================================================================================
ORA-00600: internal error code, arguments: [qerghFetch.y], [], [], [], [], [], [], [], [], [], [], []
Implements hash aggregation for query source
=========================================================================================
ORA-00600: internal error code, arguments: [qeshQBNextLoad.1], [], [], [], [], [], [], [], [], [], [], []
Hash Table Infrastructure -get Next buffer during Load
=========================================================================================
ORA-00600: internal error code, arguments: [qkshtQBGet:1], [], [], [], [], [], [], [], [], [], [], []
gets memory pointer for a query block.
Make sure the query block pointer is not NULL
=========================================================================================
ORA-00600: internal error code, arguments: [qeshIHBuildOnPartition block missed], [], [], [], [], [], [], [], [], [], [], []
Hash Table Infrastructure
update the partition at the end.
=========================================================================================
ORA-00600: internal error code, arguments: [kghssgfr2], [1]
=========================================================================================
ORA-07445: exception encountered: core dump [PC:0x0] [SIGILL] [ADDR:0x0] [PC:0x0] [Illegal opcode]
=========================================================================================
ORA-00600 [kkogbro: no kkoaptyp]
=========================================================================================
ORA-00600: internal error code, arguments: [kewrose_1], [600]
========================================================================================
ORA-00600: internal error code, arguments: [1868], [0x000000000], [], [], [], [], [], [], [], [], [], []

Core dumps are also possible.

—————

Redo log corruption with checksum error has also been observed.

Two known examples below:

example 1:

Alert.log messages:

ORA-00368: checksum error in redo log block
ORA-00353: log corruption near block 73804 change 8112409541614 time 12/07/2016 07:12:25
ORA-00334: archived log: ‘/dev/rredo13’
ORA-07445: exception encountered: core dump [pkrdi()+780] [SIGSEGV] [ADDR:0x0] [PC:0x10367B26C] [Invalid permissions for mapped

—————

There have been also transient database block corruptions or control file block corruption with checksum errors in the database where a reread finds valid data.

example 2 (transient database block corruption with checksum error):

Corrupt block relative dba: 0x5a066b2f (file 360, block 420655)
Bad check value found during buffer read
Data in bad block:
type: 6 format: 2 rdba: 0x5a066b2f
last change scn: 0x00cc.6a826294 seq: 0x1 flg: 0x06
spare1: 0x0 spare2: 0x0 spare3: 0x0
consistency value in tail: 0x62940601
check value in block header: 0x9e7d
computed block checksum: 0x0 —> 0x0 means that checksum is good when printing the error message (transient problem)
Reading datafile ‘Datafile name’ for corruption at rdba: 0x5a066b2f (file 360, block 420655)
Reread (file 360, block 420655) found valid data
Hex dump of (file 360, block 420655) in trace file ….
Repaired corruption at (file 360, block 420655)

example 3 (transient control file corruption with checksum error):

Hex dump of (file 0, block 1) in trace file …
Corrupt block relative dba: 0x00000001 (file 0, block 1)
Bad check value found during control file header read
Data in bad block:
type: 21 format: 2 rdba: 0x00000001
last change scn: 0x0000.00000000 seq: 0x1 flg: 0x04
spare1: 0x0 spare2: 0x0 spare3: 0x0
consistency value in tail: 0x00001501
check value in block header: 0xca35
computed block checksum: 0x0 —> 0x0 means that checksum is good when printing the error message (transient problem)
Errors in file ..:
ORA-00202: control file: ‘/oracle/dbs/control_01.ctl’
Errors in file …
ORA-00227: corrupt block detected in control file: (block 1, # blocks 1)
ORA-00202: control file: ‘/oracle/dbs/control_01.ctl’

WORKAROUND

There is no workaround to avoid the problem, but if log corruption is encountered, one possible workaround is to clear the unarchived redo log. The fix is to rollback the IBM SP or apply the updated fixes.

Syntax to clear logfile:

alter database clear <unarchived> logfile group <integer>;
alter database clear <unarchived> logfile ‘<filename>’;

PATCHES

The fix is now ready from IBM

It can be downloaded for the above releases via:

ftp://aix.software.ibm.com/aix/ifixes/

Affected AIX Levels Fixed In iFix / APAR (ftp://aix.software.ibm.com/aix/ifixes/)
6100-09-08 6100-09-09 IV93840
7100-03-08 7100-03-09 IV93884
7100-04-03 7100-04-04 IV93845
7200-00-03 7200-00-04 IV93883
7200-01-01 7200-01-02 IV93885

The fix is included in the next to be released AIX Service Packs.

IBM HIPER APAR
Abstract: PROBLEMS CAN OCCUR WITH THREAD_CPUTIME AND THREAD_CPUTIME_FAST

This APAR corrects an issue with system call thread_cputime_self with floating point registers which is exposed by Oracle Database 11gR2.

PROBLEM SUMMARY:
The thread_cputime or thread_cputime_fast interfaces can
cause invalid data in the FP/VMX/VSX registers if the thread
page faults in this function

For more information see the following from IBM:

http://www-01.ibm.com/support/docview.wss?uid=isg1SSRVPOAIX71HIPER170303-1247

参考：ALERT: Database Corruption ORA-600 ORA-7445 errors after applying AIX SP patches – AIX 6.1.9.8 or AIX 7.1.3.8 or AIX 7.1.4.3 or AIX 7.2.0.3 or AIX 7.2.1.0, 01 (Doc ID 2237498.1)