ext3/ext4 superblock recovery

创建ext4文件系统

[root@localhost ~]# mkfs.ext4 /dev/sdb1
mke2fs 1.42.9 (28-Dec-2013)
Filesystem label=
OS type: Linux
Block size=4096 (log=2)
Fragment size=4096 (log=2)
Stride=0 blocks, Stripe width=0 blocks
1310720 inodes, 5242624 blocks
262131 blocks (5.00%) reserved for the super user
First data block=0
Maximum filesystem blocks=2153775104
160 block groups
32768 blocks per group, 32768 fragments per group
8192 inodes per group
Superblock backups stored on blocks: 
        32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632, 2654208, 
        4096000

Allocating group tables: done                            
Writing inode tables: done                            
Creating journal (32768 blocks): done
Writing superblocks and filesystem accounting information: done   

[root@localhost ~]# mkdir /sdb
[root@localhost ~]# mount /dev/sdb1 /sdb
[root@localhost ~]# df -h
Filesystem           Size  Used Avail Use% Mounted on
/dev/mapper/ol-root   36G  4.0G   32G  12% /
devtmpfs             1.8G     0  1.8G   0% /dev
tmpfs                1.8G     0  1.8G   0% /dev/shm
tmpfs                1.8G  8.9M  1.8G   1% /run
tmpfs                1.8G     0  1.8G   0% /sys/fs/cgroup
/dev/sda1            497M  195M  303M  40% /boot
tmpfs                369M     0  369M   0% /run/user/0
/dev/sdb1             20G   45M   19G   1% /sdb

准备测试数据

[root@localhost sdb]# cd /etc/sysctl.d/
[root@localhost sysctl.d]# ls
99-sysctl.conf
[root@localhost sysctl.d]# cp 99-sysctl.conf /sdb
[root@localhost sysctl.d]# more 99-sysctl.conf 
# System default settings live in /usr/lib/sysctl.d/00-system.conf.
# To override those settings, enter new settings here, or in an /etc/sysctl.d/<name>.conf file
#
# For more information, see sysctl.conf(5) and sysctl.d(5).

破坏ext4文件系统

[root@localhost ~]#  dd if=/dev/zero of=/dev/sdb1 bs=1024 count=5
5+0 records in
5+0 records out
5120 bytes (5.1 kB) copied, 0.00270838 s, 1.9 MB/s
[root@localhost ~]# mount /dev/sdb1 /sdb
mount: unknown filesystem type '(null)'

日志报错

[ 8868.362628] sd 32:0:1:0: [sdb] Cache data unavailable
[ 8868.362632] sd 32:0:1:0: [sdb] Assuming drive cache: write through
[ 8868.363714]  sdb: sdb1
[ 8868.390297] sd 32:0:1:0: [sdb] Cache data unavailable
[ 8868.390301] sd 32:0:1:0: [sdb] Assuming drive cache: write through
[ 8868.391462]  sdb: sdb1
[ 8900.130143] EXT4-fs (sdb1): mounted filesystem with ordered data mode. Opts: (null)
[ 8900.130163] SELinux: initialized (dev sdb1, type ext4), uses xattr
[ 8902.803966] sdb1: WRITE SAME failed. Manually zeroing.
1
fsck修复
1
[root@localhost ~]# fsck -t ext4 /dev/sdb1
fsck from util-linux 2.23.2
e2fsck 1.42.9 (28-Dec-2013)
ext2fs_open2: Bad magic number in super-block
fsck.ext4: Superblock invalid, trying backup blocks...
/dev/sdb1 was not cleanly unmounted, check forced.
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information
Free blocks count wrong for group #1 (31740, counted=31739).
Fix<y>? yes
Free blocks count wrong (5116302, counted=5116301).
Fix<y>? yes
Free inodes count wrong for group #0 (8181, counted=8180).
Fix<y>? yes
Free inodes count wrong (1310709, counted=1310708).
Fix<y>? yes

/dev/sdb1: ***** FILE SYSTEM WAS MODIFIED *****
/dev/sdb1: 12/1310720 files (0.0% non-contiguous), 126323/5242624 blocks

测试修复结果

[root@localhost ~]# 
[root@localhost ~]# 
[root@localhost ~]# mount /dev/sdb1 /sdb
[root@localhost ~]# df -h
Filesystem           Size  Used Avail Use% Mounted on
/dev/mapper/ol-root   36G  4.0G   32G  12% /
devtmpfs             1.8G     0  1.8G   0% /dev
tmpfs                1.8G     0  1.8G   0% /dev/shm
tmpfs                1.8G  8.9M  1.8G   1% /run
tmpfs                1.8G     0  1.8G   0% /sys/fs/cgroup
/dev/sda1            497M  195M  303M  40% /boot
tmpfs                369M     0  369M   0% /run/user/0
/dev/sdb1             20G   45M   19G   1% /sdb
[root@localhost ~]# cd /sdb
[root@localhost sdb]# ls
99-sysctl.conf  lost+found
[root@localhost sdb]# more 99-sysctl.conf 
# System default settings live in /usr/lib/sysctl.d/00-system.conf.
# To override those settings, enter new settings here, or in an /etc/sysctl.d/<name>.conf file
#
# For more information, see sysctl.conf(5) and sysctl.d(5).

ext4文件系统修复

[root@localhost ~]# mkfs.ext3 /dev/sdb1
mke2fs 1.42.9 (28-Dec-2013)
Filesystem label=
OS type: Linux
Block size=4096 (log=2)
Fragment size=4096 (log=2)
Stride=0 blocks, Stripe width=0 blocks
1310720 inodes, 5242624 blocks
262131 blocks (5.00%) reserved for the super user
First data block=0
Maximum filesystem blocks=4294967296
160 block groups
32768 blocks per group, 32768 fragments per group
8192 inodes per group
Superblock backups stored on blocks: 
        32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632, 2654208, 
        4096000

Allocating group tables: done                            
Writing inode tables: done                            
Creating journal (32768 blocks): done
Writing superblocks and filesystem accounting information: done   

[root@localhost ~]# mount /dev/sdb1 /sdb
[root@localhost ~]# df -h
Filesystem           Size  Used Avail Use% Mounted on
/dev/mapper/ol-root   36G  4.0G   32G  12% /
devtmpfs             1.8G     0  1.8G   0% /dev
tmpfs                1.8G     0  1.8G   0% /dev/shm
tmpfs                1.8G  8.9M  1.8G   1% /run
tmpfs                1.8G     0  1.8G   0% /sys/fs/cgroup
/dev/sda1            497M  195M  303M  40% /boot
tmpfs                369M     0  369M   0% /run/user/0
/dev/sdb1             20G   45M   19G   1% /sdb
[root@localhost ~]# dd if=/dev/zero of=/dev/sdb1 bs=1024 count=5
5+0 records in
5+0 records out
5120 bytes (5.1 kB) copied, 0.0138915 s, 369 kB/s
[root@localhost ~]# fsck -t ext3 /dev/sdb1
fsck from util-linux 2.23.2
e2fsck 1.42.9 (28-Dec-2013)
ext2fs_open2: Bad magic number in super-block
fsck.ext3: Superblock invalid, trying backup blocks...
/dev/sdb1 was not cleanly unmounted, check forced.
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Pass 3: Checking directory connectivity
Pass 4: Checking reference counts
Pass 5: Checking group summary information

/dev/sdb1: ***** FILE SYSTEM WAS MODIFIED *****
/dev/sdb1: 11/1310720 files (0.0% non-contiguous), 126322/5242624 blocks
[root@localhost ~]# mount /dev/sdb1 /sdb
[root@localhost ~]# df -h
Filesystem           Size  Used Avail Use% Mounted on
/dev/mapper/ol-root   36G  4.0G   32G  12% /
devtmpfs             1.8G     0  1.8G   0% /dev
tmpfs                1.8G     0  1.8G   0% /dev/shm
tmpfs                1.8G  8.9M  1.8G   1% /run
tmpfs                1.8G     0  1.8G   0% /sys/fs/cgroup
/dev/sda1            497M  195M  303M  40% /boot
tmpfs                369M     0  369M   0% /run/user/0
/dev/sdb1             20G   45M   19G   1% /sdb

fsck修复危险性较大,建议先备份对应的分区(dd命令备份分区)然后再处理,有导致分区数据全部或者部分丢失的风险

rm -rf delete datafile recovery

因为人员离职闹得不愉快,系统工程师离职后,由于公司未及时关闭其vpn,数据库服务器(Linux 6.5 Oracle 11.2.0.1)帐号未及时被修改,最后直接上去rm ORACLE_BASE给干掉,悲剧的是ORADATA目录也在里面,更加悲剧的是所有数据文件都在里面.也就是说数据库彻底被删除,而且没有任何备份.朋友咨询了我,让我给予支持.最后比较幸运,文件没有被覆盖,inode都还在,通过extundelete顺利恢复所有数据文件,控制文件,redo文件(extundelete恢复Linux被删除文件),数据库顺利打开,实现0丢失,算是一次完美的恢复

[root@DB1 tmp]# tar xvf extundelete-0.2.4.tar 
extundelete-0.2.4/
extundelete-0.2.4/acinclude.m4
extundelete-0.2.4/missing
extundelete-0.2.4/autogen.sh
extundelete-0.2.4/aclocal.m4
extundelete-0.2.4/configure
extundelete-0.2.4/LICENSE
extundelete-0.2.4/README
extundelete-0.2.4/install-sh
extundelete-0.2.4/config.h.in
extundelete-0.2.4/src/
extundelete-0.2.4/src/extundelete.cc
extundelete-0.2.4/src/block.h
extundelete-0.2.4/src/kernel-jbd.h
extundelete-0.2.4/src/insertionops.cc
extundelete-0.2.4/src/block.c
extundelete-0.2.4/src/cli.cc
extundelete-0.2.4/src/extundelete-priv.h
extundelete-0.2.4/src/extundelete.h
extundelete-0.2.4/src/jfs_compat.h
extundelete-0.2.4/src/Makefile.in
extundelete-0.2.4/src/Makefile.am
extundelete-0.2.4/configure.ac
extundelete-0.2.4/depcomp
extundelete-0.2.4/Makefile.in
extundelete-0.2.4/Makefile.am
[root@DB1 tmp]# cd extundelete-0.2.4
[root@DB1 extundelete-0.2.4]# ./configure 
Configuring extundelete 0.2.4
Writing generated files to disk
[root@DB1 extundelete-0.2.4]# make && make install
make -s all-recursive
Making all in src
Making install in src
  /usr/bin/install -c extundelete '/usr/local/bin'
[root@DB1 extundelete-0.2.4]# df -h
Filesystem      Size  Used Avail Use% Mounted on
/dev/sda3       244G   11G  221G   5% /
tmpfs            16G   72K   16G   1% /dev/shm
/dev/sda1       190M   62M  119M  35% /boot
/dev/sdb1       2.0T   71M  1.9T   1% /home
[root@DB1 extundelete-0.2.4]# umount /dev/sdb1
umount: /home: device is busy.
        (In some cases useful info about processes that use
         the device is found by lsof(8) or fuser(1))
[root@DB1 extundelete-0.2.4]# fuser -m -u /home
/home:                3914c(oracle)  8372c(oracle)
[root@DB1 extundelete-0.2.4]# kill -9 3914
[root@DB1 extundelete-0.2.4]# fuser -m -u /home
/home:                8372c(oracle)
[root@DB1 extundelete-0.2.4]# kill -9 8372
[root@DB1 extundelete-0.2.4]# fuser -m -u /home
[root@DB1 extundelete-0.2.4]# umount /dev/sdb1
[root@DB1 extundelete-0.2.4]# df -h
Filesystem      Size  Used Avail Use% Mounted on
/dev/sda3       244G   11G  221G   5% /
tmpfs            16G   72K   16G   1% /dev/shm
/dev/sda1       190M   62M  119M  35% /boot
[root@DB1 extundelete-0.2.4]# extundelete /dev/sdb1 --restore-all
NOTICE: Extended attributes are not restored.
Loading filesystem metadata ... 16384 groups loaded.
Loading journal descriptors ... 26542 descriptors loaded.
Searching for recoverable inodes in directory / ... 
18896 recoverable inodes found.
Looking through the directory structure for deleted files ... 
2 recoverable inodes still lost.
Unable to restore inode 43778050 (file.43778050): Space has been reallocated.
[root@DB1 extundelete-0.2.4]# ls
acinclude.m4  autogen.sh  config.h.in  config.status  configure.ac  install-sh  Makefile     Makefile.in 
aclocal.m4    config.h    config.log   configure      depcomp       LICENSE     Makefile.am  missing    
[root@DB1 extundelete-0.2.4]# cd RECOVERED_FILES/
[root@DB1 RECOVERED_FILES]# ls
app  file.43778051  oracle  oraInventory
[root@DB1 RECOVERED_FILES]# cd app
[root@DB1 app]# ls
admin  cfgtoollogs  diag  oracle  oradata  orcl  ORCL
[root@DB1 app]# cd oradata
[root@DB1 oradata]# ls
orcl
[root@DB1 oradata]# cd orcl
[root@DB1 orcl]# ls
control01.ctl  redo01.log  redo02.log  redo03.log  sysaux01.dbf  system01.dbf  undotbs01.dbf  users01.dbf
[root@DB1 orcl]# ls -ltr
total 2908776
-rw-r--r--. 1 root root  734011392 Nov 18 02:06 system01.dbf
-rw-r--r--. 1 root root 1069555712 Nov 18 02:06 sysaux01.dbf
-rw-r--r--. 1 root root  120594432 Nov 18 02:06 undotbs01.dbf
-rw-r--r--. 1 root root  887365632 Nov 18 02:06 users01.dbf
-rw-r--r--. 1 root root    9748480 Nov 18 02:06 control01.ctl
-rw-r--r--. 1 root root   52429312 Nov 18 02:06 redo01.log
-rw-r--r--. 1 root root   52429312 Nov 18 02:06 redo02.log
-rw-r--r--. 1 root root   52429312 Nov 18 02:06 redo03.log
[root@DB1 orcl]# 

再次提醒各位:数据库备份重于一切,防天灾的同时还要防人灾,也希望圈子里面以后不要听到类似故障.

multipath实现设备用户组设置

现在的Linux系统中,很多都会使用系统自带的multipath多路径软件,在以前的版本中,我们一般通过multipath+udev或者multipath+rc.local来实现多路径和权限设置,而在redhat 5.3及其以后的版本中multipath就直接可以实现多路径聚合、设备持久化、用户组设置
操作系统版本

[root@rac1 dev]# uname -r
2.6.39-300.26.1.el5uek

[root@rac1 dev]# more /etc/issue
Oracle Linux Server release 5.9
Kernel \r on an \m

fdisk记录

[root@rac1 dev]# fdisk -l 
…………
Disk /dev/sdh: 134.2 GB, 134217728000 bytes
255 heads, 63 sectors/track, 16317 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes

   Device Boot      Start         End      Blocks   Id  System

Disk /dev/sdi: 33.5 GB, 33554432000 bytes
64 heads, 32 sectors/track, 32000 cylinders
Units = cylinders of 2048 * 512 = 1048576 bytes

   Device Boot      Start         End      Blocks   Id  System

multipath包
检查安装multipath相关包(该版本系统默认安装)

[root@rac1 dev]# rpm -aq|grep mapper
device-mapper-multipath-libs-0.4.9-56.0.3.el5
device-mapper-event-1.02.67-2.el5
device-mapper-1.02.67-2.el5
device-mapper-multipath-0.4.9-56.0.3.el5

获取wwid值

[root@rac1 dev]# /sbin/scsi_id -g -u -s /block/sdh
14f504e46494c45527049754962662d395751372d68356743
[root@rac1 dev]# /sbin/scsi_id -g -u -s /block/sdi
14f504e46494c4552484d486249782d464471382d354f4b58

获取uid和gid

[root@rac1 dev]# id grid
uid=1100(grid) gid=54321(oinstall) groups=54321(oinstall),1020(asmadmin),1021(asmdba)

multipath.conf配置

[root@rac1 dev]# vi /etc/multipath.conf

defaults {
user_friendly_names no
queue_without_daemon no
flush_on_last_del yes
max_fds max
}

blacklist {
devnode "^hd[a-z]"
devnode "^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]*"
devnode "^cciss.*"
}

devices {
        device {
                vendor                  "OPNFILER "
                product                 "LUN"
                path_grouping_policy    group_by_prio
                features             "3 queue_if_no_path pg_init_retries 50"
                getuid_callout          "/sbin/scsi_id -g -u -s /block/%n"
                path_checker            tur
                path_selector           "round-robin 0"
                hardware_handler        "1 alua"
                failback               immediate
                rr_weight              uniform
                rr_min_io             128
        }
}

multipaths {
        multipath {
                wwid                    14f504e46494c45527049754962662d395751372d68356743  #wwid
                alias                   xifenfei128
                uid                     1100                                               #uid
                gid                     1020                                               #gid
        }



        multipath {
                wwid                    14f504e46494c4552484d486249782d464471382d354f4b58   #wwid
                alias                   xifenfei32                                          
                uid                     1100                                                #uid
                gid                     1020                                                #gid
         }
}

启动multipath

[root@rac1 dev]# modprobe dm-multipath 
[root@rac1 dev]# modprobe dm-round-robin 
[root@rac1 dev]# chkconfig multipathd on
[root@rac1 dev]# service multipathd start 
Starting multipathd daemon: [  OK  ]
[root@rac1 dev]# multipath -F
[root@rac1 dev]# multipath -v2
create: xifenfei128 (14f504e46494c45527049754962662d395751372d68356743) undef OPNFILER,VIRTUAL-DISK
size=125G features='0' hwhandler='0' wp=undef
`-+- policy='round-robin 0' prio=1 status=undef
  `- 3:0:0:9  sdh 8:112 undef ready  running
create: xifenfei32 (14f504e46494c4552484d486249782d464471382d354f4b58) undef OPNFILER,VIRTUAL-DISK
size=31G features='0' hwhandler='0' wp=undef
`-+- policy='round-robin 0' prio=1 status=undef
  `- 3:0:0:10 sdi 8:128 undef ready  running

查看生成多路径设备
注意设备名称、组、用户

[root@rac1 dev]# ls -l /dev/mapper/xifenfei*
brw-rw---- 1 grid asmadmin 252, 2 Jan  7 21:21 /dev/mapper/xifenfei128
brw-rw---- 1 grid asmadmin 252, 3 Jan  7 21:21 /dev/mapper/xifenfei32

shell脚本获得extents分布

比较深入看过dba_extents视图的朋友都知道,它得到extent的信息不是通过普通的存储在数据库中的基表获得,而是x$相关的表获得(x$表是数据库启动时候在内存中创建,不存在数据文件中),因为当数据库未正常启动,我们无法直接确定某个block是否在某个对象中.其实关于extent的信息都已经记录在了segment header的block中,通过dump该block记录的rdba信息,未转化为file_id和block_id,这里写shell脚本实现把segment header dump 内容转化为类似dba_extents记录,方便在某些不能open的库中分析某个异常block是否属于某个表

#! /bin/bash

dec2bin(){
  val_16=$1
  ((num=$val_16));
  val=`echo $num`
  local base=$2
  [ $val -eq 0 ] && bin=0
if [ $val -ge $base ]; then
    dec2bin $val $((base*2))
    if [ $val -ge $base ]; then
      val=$(($val-$base))
      bin=${bin}1
    else
      bin=${bin}0
    fi
  fi
  [ $base -eq 1 ] && printf  $bin
}

for i in `grep "length:" $1 |awk '{print $1 $3}'`;
do

rdba=`echo ${i:0:10}`
blocks=`echo ${i:10}`
echo -n "rdba:"$rdba"    "

bin2=`dec2bin $rdba  1`

len=`expr length $bin2`
len_gd=22
len_jg=`expr $len - $len_gd`

file_no_2=`echo ${bin2:0:$len_jg}`
((file_no=2#$file_no_2))
echo -n "file_id:"$file_no"    "

block_no_2=`echo ${bin2:$len_jg}`
((block_no=2#$block_no_2))
echo -n "block_id:"$block_no"    "
echo  "blocks:"$blocks

done;

trace文件中部分信息

  -----------------------------------------------------------------
   0x00400901  length: 7     
   0x00402e10  length: 8     
   0x00402e60  length: 8     
   0x00402e68  length: 8     
   0x00402ea0  length: 8     
   0x00402f20  length: 8     
   0x00402f48  length: 8     
   0x00403050  length: 8     
   0x00403180  length: 8     
   0x00403b38  length: 8     
   0x00404c48  length: 8     
   0x00404c78  length: 8     
   0x00404cf8  length: 8     
   0x00404da8  length: 8     
   0x00404db8  length: 8     
   0x00404de8  length: 8     
   0x00404e80  length: 128   
   0x00405900  length: 128   
   0x00406500  length: 128   
   0x00406980  length: 128   
   0x00407480  length: 128   
   0x00407500  length: 128   
   0x00407680  length: 128   
   0x00407800  length: 128   
   0x00407880  length: 128   
   0x00407a00  length: 128   
   0x00407a80  length: 128   
   0x00407c80  length: 128   
…………

执行结果

[oracle@xifenfei tmp]$ ./get_extent.sh /u01/app/oracle/diag/rdbms/cdb/cdb/trace/cdb_ora_29565.trc
rdba:0x00400901    file_id:1    block_id:2305     blocks:7
rdba:0x00402e10    file_id:1    block_id:11792     blocks:8
rdba:0x00402e60    file_id:1    block_id:11872     blocks:8
rdba:0x00402e68    file_id:1    block_id:11880     blocks:8
rdba:0x00402ea0    file_id:1    block_id:11936     blocks:8
rdba:0x00402f20    file_id:1    block_id:12064     blocks:8
rdba:0x00402f48    file_id:1    block_id:12104     blocks:8
rdba:0x00403050    file_id:1    block_id:12368     blocks:8
rdba:0x00403180    file_id:1    block_id:12672     blocks:8
rdba:0x00403b38    file_id:1    block_id:15160     blocks:8
rdba:0x00404c48    file_id:1    block_id:19528     blocks:8
rdba:0x00404c78    file_id:1    block_id:19576     blocks:8
rdba:0x00404cf8    file_id:1    block_id:19704     blocks:8
rdba:0x00404da8    file_id:1    block_id:19880     blocks:8
rdba:0x00404db8    file_id:1    block_id:19896     blocks:8
rdba:0x00404de8    file_id:1    block_id:19944     blocks:8
rdba:0x00404e80    file_id:1    block_id:20096     blocks:128
rdba:0x00405900    file_id:1    block_id:22784     blocks:128
rdba:0x00406500    file_id:1    block_id:25856     blocks:128
rdba:0x00406980    file_id:1    block_id:27008     blocks:128
rdba:0x00407480    file_id:1    block_id:29824     blocks:128
rdba:0x00407500    file_id:1    block_id:29952     blocks:128
rdba:0x00407680    file_id:1    block_id:30336     blocks:128
rdba:0x00407800    file_id:1    block_id:30720     blocks:128
…………

新删除data guard归档日志shell脚本

以前写过删除dataguard归档日志的方法(删除data guard归档日志),但是以前的方法确实不够灵活也不够简便,现在提供最新的一次在客户现场部署的dg删除归档日志的shell脚本

#!/bin/sh
source ~/.bash_profile

grep "Media Recovery Log" $ORACLE_BASE/admin/$ORACLE_SID/bdump/alert_${ORACLE_SID}.log|
\ awk '{print $4}'|sed -e 's/^/rm /' >/tmp/rmarchlog.sh
chmod +x /tmp/rmarchlog.sh

/tmp/rmarchlog.sh
cd $ORACLE_BASE/admin/$ORACLE_SID/bdump

cat alert_${ORACLE_SID}.log >>alert_${ORACLE_SID}.log.bak
echo ''>alert_${ORACLE_SID}.log

rm -f /tmp/rmarchlog.sh


$ORACLE_HOME/bin/rman target / <<XIFENFEI
crosscheck archivelog all;
delete expired archivelog all;
YES
exit;
XIFENFEI

根据alert日志中dg应用日志的信息”Media Recovery Log”信息来删除掉相关的归档日志,可以保证应用过的归档日志都被删除,而没有应用的归档日志都保留.