Young India 99

YES-Young Energetic Sincere

Home
Instant Inspiration
Incredible India
Knowledge Transfer(KT)
Placement Puraan
VTU Results
Technical Stuff
EAS(SMS + GPS)
Smart Parental Control
Cognitive Radio
Best LTE tutorials
Solaris Filesystem
Maglev trains
Solaris Crash Dump
Femtocells in India
Coders Adda
Contact Us
Solaris crash dump analysis using mdb and SCAT
 
 
 
Getting started
 
Check if savecore is enabled
 
bash-3.00# dumpadm
Dump content: kernel pages
Dump device: /dev/md/dsk/d10 (swap)
Savecore directory: /opt/crash/zcydsd705ha
Savecore enabled: yes
 
Bringing up solaris from OK prompt
 
OK>start /SYS
OK>start /SP/console
OK>boot
 
Running MDB/SCAT 
 
$> cd crash/
$> ls
unix.0 vmcore.0
 
MDB
$>mdb -k 0
 
SCAT
$>scat unix.0 vmcore.0
 
Creating objdumps of the kernel driver objs to trace line number in the source code
 
objdump -d -M sparc -S my_kernel_driver.o > my_kernel_driver.dmp
 
Example crash dump analysis using MDB
 
bash-3.00# mdb -k 3
mdb: warning: dump is from SunOS 5.10 Generic_137111-08; dcmds and macros may no
t match kernel implementation
Loading modules: [ unix krtld genunix specfs dtrace ufs sd mpt pcisch md ip hook
 neti sctp arp usba fctl lofs zfs random ptm cpc fcip crypto ufs ipc ]

Panic stack
 
> $C
000002a100f88661 sum_data+0x6c(60028809000, 60043338680, 600272521f0, 0, 1000, 0
)
000002a100f88741 ufs_sum_op+0x2ac(60028809000, 60043338680, 600272521f0, 0,
30002965880, 0)
000002a100f88821 ufs_defer_ops+0x2c0(60028809000, 600272419e0, 24, 3, 1,
600279470e8)
000002a100f88921 ufs_active_end+0x84(60028809000, 7b2b2a04, 0, 0, 600294bc4a0,
0)
000002a100f88a01 ufs_wait_link+0x5b0(60028809000, 4, 120, 1, 0, 0)
000002a100f88ae1 ufs_ioctl+0x9ec(ffffffffffffffff, 7824, 151024, 100001,
600275afc18, 2a100f89adc)
000002a100f890e1 fop_ioctl+0x20(60028809258, 7824, 151024, 100001, 600275afc18,
7b29eb30)
000002a100f89191 ioctl+0x184(2, 6002292ce80, 151024, 8004667e, 80046400, 7824)
000002a100f892e1 syscall_trap32+0xcc(2, 7824, 151024, 1765c8, ff101a00, 0)
 
Stack frame structure for the given function

> 000002a100f88661+0x7ff::print -t struct frame
{
    long [8] fr_local = [ 0xd000, 0x7b2cf000, 0x7b2cf800, 0, 0, 0xdbdf, 0x6002721c000, 0x7b2cf000 ]
    long [6] fr_arg = [ 0x60028809000, 0x60043338680, 0x600272521f0, 0, 0x1000, 0 ]
    struct frame *fr_savfp = 0x2a100f88741
    long fr_savpc = 0x7b2ba384
    long [6] fr_argd = [ 0x600275afc18, 0x1374634, 0x2a100f88731, 0x10f5d80, 0x2a100f88861, 0x1144944 ]
    long [1] fr_argx = [ 0x3 ]
}
 
More information is collected by setting kmem_flags=0x1 in /etc/system

> 0x60028809000::whatis
60028809000 is 60028809000+0, allocated from kmem_alloc_4096

Printing the address contents
 
> 0x60028809000,12/X
0x60028809000:  300             2965880         0               0               0               0
                0               0               0               10000           0               0
                0               0               2               0               3b6             3b6
    struct frame *fr_savfp = 0x2a100f89191
    long fr_savpc = 0x1107df0
    long [6] fr_argd = [ 0x2a100f891c1, 0x13776bc, 0xfedfbc80, 0x3, 0x2a100f89201, 0x113dd6c ]
    long [1] fr_argx = [ 0x1874000 ]
}

 
Printing the contents of a structure variable

> 60028809258::print -t vnode_t
{
    kmutex_t v_lock = {
        void *[1] _opaque = [ 0 ]
    }
    uint_t v_flag = 0x1
    uint_t v_count = 0xb
    void *v_data = 0x600288091d8
    struct vfs *v_vfsp = 0x60024246780
    struct stdata *v_stream = 0
    enum vtype v_type = 2 (VDIR)
    dev_t v_rdev = 0
    struct vfs *v_vfsmountedhere = 0
    struct vnodeops *v_op = 0x600275f5200
    struct page *v_pages = 0
    pgcnt_t v_npages = 0
    pgcnt_t v_msnpages = 0
    struct page *v_scanfront = 0
    struct page *v_scanback = 0
    struct filock *v_filocks = 0
    struct shrlocklist *v_shrlocks = 0
    krwlock_t v_nbllock = {
        void *[1] _opaque = [ 0 ]
    }
    kcondvar_t v_cv = {
        ushort_t _opaque = 0
    }
    void *v_locality = 0
    struct fem_head *v_femhead = 0
    char *v_path = 0x60027e55458 "/opt/data"
    uint_t v_rdcnt = 0
    uint_t v_wrcnt = 0
    u_longlong_t v_mmap_read = 0
    u_longlong_t v_mmap_write = 0
    void *v_mpssdata = 0
    hrtime_t v_scantime = 0
    ushort_t v_mset = 0
    uint_t v_msflags = 0
    struct vnode *v_msnext = 0
    struct vnode *v_msprev = 0
    krwlock_t v_mslock = {
        void *[1] _opaque = [ 0 ]
    }
}
 

> $C
000002a10668cab1 vpanic(109e658, 70009bb6c80, 0, 1, 0, 30042b6d4b0)
000002a10668cb61 page_vpsub+0x70(30042b6d4f8, 70009bb6c80, 109e400, 5a40, 1, 0)
000002a10668cc11 hat_page_setattr+0xc0(1881788, 1, 70009bb6c80, 1, 0, 30042b6d4b0)
000002a10668ccc1 segvn_faultpage+0x684(0, 30042b6d4b0, 3000000000, 18000, 10000, 0)
000002a10668cdf1 segvn_fault+0xc04(3000002000, 300363803f0, 2, 1096568, 0, 0)
000002a10668cfc1 as_fault+0x4c8(300363803f0, 300292a45a0, 3000000000, 30022932320, 189fbc0, 0)
000002a10668d0d1 pagefault+0x68(3000000000, 0, 2, 0, 3001c953208, 300229322a8)
000002a10668d191 trap+0xd50(2a10668db90, 10000, 0, 2, 10008aacc, 0)
000002a10668d2e1 utl0+0x4c(1002744d0, 1e, 3000000000, 3000000000, 10020d, 10020cfd0)
> 000002a10668cb61::threadlist
            ADDR             PROC              LWP CMD/LWPID
>
> 000002a10668cb61::whatis
mdb: traptrace not configured: no mapping for address
2a10668cb61 is in thread 3001d0cc560's stack
 
Getting details about the thread

> 3001d0cc560::thread -p
            ADDR             PROC              LWP             CRED
000003001d0cc560      3001c953208      300292a45a0      6003a0dd230
 
Getting details about the process

> 3001c953208::ps -t
S    PID   PPID   PGID    SID    UID      FLAGS             ADDR NAME
R  12225  12811  12225  12225    100 0x4a004000 000003001c953208 aWriter
        T     0x3001d0cc560 <TS_ONPROC>
 

> ::pgrep aWriter | ::walk thread | ::findstack
stack pointer for thread 3001d0cc560: 2a10668cab1
  000002a10668cb61 page_vpsub+0x70()
  000002a10668cc11 hat_page_setattr+0xc0()
  000002a10668ccc1 segvn_faultpage+0x684()
  000002a10668cdf1 segvn_fault+0xc04()
  000002a10668cfc1 as_fault+0x4c8()
  000002a10668d0d1 pagefault+0x68()
  000002a10668d191 trap+0xd50()
  000002a10668d2e1 utl0+0x4c()
stack pointer for thread 30019590b60: 2a104b4cf91
[ 000002a104b4cf91 cv_timedwait_sig+0x16c() ]
  000002a104b4d041 cv_waituntil_sig+0x8c()
  000002a104b4d111 poll_common+0x4e8()
  000002a104b4d201 pollsys+0xf8()
  000002a104b4d2e1 syscall_trap+0xac()
 
Files being operated by process
 

> 000003001c953208::pfiles

FD   TYPE            VNODE INFO

   0  CHR 00000600228a4b80 /devices/pseudo/mm@0:null

   1  CHR 00000600228a4b80 /devices/pseudo/mm@0:null

   2  CHR 00000600228a4b80 /devices/pseudo/mm@0:null

   3 DOOR 0000060030f37540 /var/run/name_service_door [door to 'nscd' (proc=60030fe90c0)]

   4 SOCK 0000030025ce9b80 socket: AF_INET 127.0.0.1 22518 remote: AF_INET 127.0.0.1 41061

   5 PROC 000003004151c9c0 /proc/12225/pagedata (proc=3001c953208)

   6  REG 000006003b87bd68

  /opt/config/applications/desktop/jws/CEM/helpsets/helpset/Core_BaseLogs_help/content/PM179_Format_3_98487.html

 

 

 

Printing panic info

 

> ::panicinfo
             cpu                1
          thread      30205934740
         message BAD TRAP: type=34 rp=2a106d250c0 addr=ffffbeefdeadbf27 mmu_fsr=0
          tstate       9900001604
              g1         7be01fe8
              g2         702f8018
              g3          111e3f4
              g4              6c0
              g5         88000000
              g6                0
              g7      30205934740
              o0 deadbeefdeadbeef
              o1               18
              o2              138
              o3                1
              o4                0
              o5                0
              o6      2a106d24961
              o7         7b26f4a8
              pc         7b299a58
             npc         7b299a5c
               y               20
            sfsr                0
            sfar ffffbeefdeadbf27
              tt               34

 

Printing the recent messages logged in the system

 

>::msgbuf

 

Who is referring the buffer

 

> 30081aa2b90::kgrep |::whatis

2a10b0518a0 is in thread 2a10b051ca0's

 

Getting thread stack

 

>2a10b051ca0::findstack -v

stack pointer for thread 2a10b051ca0: 2a10b050361

[ 000002a10b050361 panic_idle+0x14() ]

  000002a10b050411 ktl0+0x64(301e5baf5c0, 301768defb0, 0, 34, 2, 3002e0fa000)

 

 

 

To quit the mdb

 

>::$q

 

 

##################################################################################################################

 

Example crash dump analysis using SCAT

 

bash-3.00# scat 1

 

 

core file:      /tmp/vmcore.1

user:           Super-User (root:0)

release:        5.10 (64-bit)

version:        Generic_137111-08

machine:        sun4v

node name:      klabnbcnm0

hw_provider:    Sun_Microsystems

system type:    SUNW,Netra-CP3060 (UltraSPARC-T1)

hostid:         856fa774

dump_conflags:  0x10000 (DUMP_KERNEL) on /dev/md/dsk/d10(32G)

time of crash:  Mon Feb 21 22:44:27 IST 2011 (core is 21 days old)

age of system:  5 days 7 hours 38 minutes 17.15 seconds

panic CPU:      21 (32 CPUs, 15.8G memory)

panic string:   mutex_enter: bad mutex, lp=30024c3a000 owner=30003ec13e0 thread=2a10a015ca0

 

sanity checks: settings...

NOTE: /etc/system: module msgsys not loaded for 4 "set msgsys:..."

NOTE: /etc/system: module oopi not loaded for "set oopi:RFC1006_KeepAlive=0x1"

NOTE: /etc/system: module nfssrv not loaded for "set nfssrv:nfs_portmon=0x1"

vmem...CPU...sysent...clock...misc...

WARNING: init died 1.623775192 seconds earlier due to a SIGKILL

done

 

SolarisCAT(vmcore.1/10V)> analyze -l

 

==== panic thread: 0x2a10a015ca0 ==== CPU: 21 ====

==== panic kernel thread: 0x2a10a015ca0  PID: 0  on CPU: 21  affinity CPU: 21 ====

cmd: sched

t_procp: 0x1859390(proc_sched)

  p_as: 0x185a7b0(kas)

  zone: global

t_stk: 0x2a10a015a90  sp: 0x1875681  t_stkbase: 0x2a10a010000

t_pri: 60(SYS)  pctcpu: 0.000000

t_lwp: 0x0  psrset: 0  last CPU: 21

idle: 1160 ticks (11.60 seconds)

start: Sat Feb 19 01:21:02 2011

age: 249805 seconds (2 days 21 hours 23 minutes 25 seconds)

tstate: TS_ONPROC - thread is being run on a processor

tflg:   T_TALLOCSTK - thread structure allocated from stk

        T_PANIC - thread initiated a system panic

tpflg:  none set

unix:panicsys+0x48(0x109d8b0, 0x2a10a0156c8, 0x1876050, 0x1, , , 0x4414001602, , , , , , , , 0x109d8b0, 0x2a10a0156c8)

unix:vpanic_common+0x78(0x109d8b0, 0x2a10a0156c8, 0x0, 0x8000, 0x3428000, 0x0)

unix:panic+0x1c(0x109d8b0, 0x1833810, 0x30024c3a000, 0x30003ec13e0, 0x2a10a015ca0, 0x0)

unix:mutex_enter(0x30024c3a000) - frame recycled

ufs:ufs_rwlock+0xe0(0x3002a195798, 0x1, 0x0, 0x8000, 0x3428000, 0x0)

genunix:fop_rwlock+0x14(0x3002a195798)

genunix:vn_rdwr+0x170(0x1, 0x3002a195798, 0x3003ab96000, 0x8000, , 0x1, 0x0, , 0x60020805df0, 0x0)

fssnap:fssnap_write_taskq+0x10c(0x300413a5630, 0x600208be080, 0x7bebac90)

genunix:taskq_thread+0x1a4(0x6002286ad98, 0x0)

unix:thread_start+0x4()

-- end of kernel thread's stack --

 

 

SolarisCAT(vmcore.1/10V)> thread summary

        reference clock = panic_lbolt: 0x2bd2313, panic_hrtime: 0x1a1f6ccdc6948

   42   threads ran since 1 second before current tick (1 user, 41 kernel)

  131   threads ran since 1 minute before current tick (1 user, 130 kernel)

 

    0*  TS_RUN threads

    1   TS_STOPPED threads (0 user, 1 kernel)

  320   TS_FREE threads (0 user, 320 kernel)

    0   !TS_LOAD (swapped) threads

 

    0   threads trying to get a mutex

    0   threads trying to get an rwlock

  452   threads waiting for a condition variable (1 user, 451 kernel)

    2*  threads sleeping on a semaphore (0 user, 2 kernel)

          longest sleeping 8.16 seconds earlier

    0   threads sleeping on a user-level sobj

    0   threads sleeping on a shuttle (door)

 

    0   threads in biowait()

 

    0   threads in dispatch queues

 

  841   total threads in allthreads list (1 user, 840 kernel)

    5   thread_reapcnt

    0   lwp_reapcnt

  846   nthread

 

SolarisCAT(vmcore.1/10V)> tlist sobj sema

  thread        pri pctcpu           idle   PID         wchan command

  0x30003d233a0  60  2.928         -1.25s     3     0x18fdd00 fsflush

  0x2a101bddca0  60  0.000         -8.16s     0     0x18e0248 sched

 

   2 threads with that sobj found.

 

threads sorted by wchan:

count   wchan (semaphore)

      1 0x18fdd00

      1 0x18e0248

 

SolarisCAT(vmcore.1/10V)> sema -l 0x18e0248

count: 0  first sleeping thread: 0x2a101bddca0

  sleepq

  thread        pri pctcpu           idle   PID         wchan command

  0x2a101bddca0  60  0.000         -8.16s     0     0x18e0248 sched

SolarisCAT(vmcore.1/10V)> thread 0x2a101bddca0

==== kernel thread: 0x2a101bddca0  PID: 0 ====

cmd: sched

t_wchan: 0x18e0248  sobj: semaphore (from genunix:seg_pasync_thread+0x114)

t_procp: 0x1859390(proc_sched)

  p_as: 0x185a7b0(kas)

  zone: global

t_stk: 0x2a101bdda90  sp: 0x2a101bdd101  t_stkbase: 0x2a101bd8000

t_pri: 60(SYS)  pctcpu: 0.000000

t_lwp: 0x0  psrset: 0  last CPU: 0

idle: 816 ticks (8.16 seconds)

start: Wed Feb 16 15:07:40 2011

age: 459407 seconds (5 days 7 hours 36 minutes 47 seconds)

tstate: TS_SLEEP - awaiting an event

tflg:   T_TALLOCSTK - thread structure allocated from stk

tpflg:  none set

tsched: TS_LOAD - thread is in memory

        TS_DONT_SWAP - thread/LWP should not be swapped

pflag:  SSYS - system resident process

 

pc:      genunix:sema_p+0x130:   call   unix:swtch

startpc: genunix:seg_pasync_thread+0x0:   save  %sp, -0xe0, %sp

 

genunix:sema_p+0x130(0x18e0248)

genunix:seg_pasync_thread+0x114(0x0, 0x0, , , , 0x2a101bdda88)

unix:thread_start+0x4()

-- end of kernel thread's stack --

 

 

SolarisCAT(vmcore.23/10V)> stream -p 0x300630e4f50

STREAM : 0x300630e4f50
+-----------------------+-----------------------+
| 0x3008087a668         | 0x3008087a570         |
| strwhead              | strrhead              |
|                       |                       |
| bytes = 0x0           | bytes = 0x0           |
| flg = 0x4022          | flg = 0x44032         |
+-----------------------+-----------------------+
          |                       ^
          v                       |
+-----------------------+-----------------------+
| 0x30196717ba8         | 0x30196717ab0         |
| tcp                   | tcp                   |
|                       |                       |
| bytes = 0x0           | bytes = 0x0           |
| flg = 0x20244022      | flg = 0x20204032      |
+-----------------------+-----------------------+

SolarisCAT(vmcore.23/10V)> tlist findcall holdlwps
==== user (LWP_SYS) thread: 0x4a2d0695b60  PID: 11607 ====
cmd: /opt/ac

t_wchan: 0x4a1ae89d3b6  sobj: condition var (from genunix:holdlwps+0xbc)
t_procp: 0x4a1ae89d2f0
 p_as: 0x4a1cad885d0  size: 1310507008  RSS: 592371712
 hat: 0x39fc37ba280  cnum: CPU0:14147/11
   cpusran: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 zone: global
t_stk: 0x2a109523ae0  sp: 0x2a109522e21  t_stkbase: 0x2a10951e000
t_pri: 59(TS)  t_tid: 123  pctcpu: 0.000000
t_lwp: 0x4a2c73648a0  machpcb: 0x2a109523ae0
 mstate: LMS_SLEEP  ms_prev: LMS_SYSTEM
 ms_state_start: 1 days 3 hours 50 minutes 23.797838672 seconds earlier
 ms_start: 8 days 1 hours 11 minutes 42.222801032 seconds earlier
psrset: 0  last CPU: 21
idle: 10022380 ticks (1 days 3 hours 50 minutes 23.80 seconds)
start: Tue Dec  6 06:09:15 2011
age: 695335 seconds (8 days 1 hours 8 minutes 55 seconds)
syscall: #62 fcntl(, 0xb137f130) (sysent: genunix:fcntl+0x0)
tstate: TS_SLEEP - awaiting an event
tflg:   T_DFLTSTK - stack is default size
tpflg:  TP_MSACCT - collect micro-state accounting information
tsched: TS_LOAD - thread is in memory
       TS_DONT_SWAP - thread/LWP should not be swapped
pflag:  SMSACCT - process is keeping micro-state accounting
       SMSFORK - child inherits micro-state accounting

pc:      genunix:cv_wait+0x38:   call   unix:swtch

genunix:cv_wait+0x38(0x4a1ae89d3b6, 0x3002cc65500, 0x31, 0x0, 0x100)
genunix:holdlwps+0xbc(0x800000)
genunix:closeandsetf+0x260(0x7, 0x4e6645dccd0)
genunix:fcntl+0x23c(, , 0x7?)
unix:syscall_trap32+0xcc()
-- switch to user thread's user stack --


  1 thread with that call found.

SolarisCAT(vmcore.23/10V)> proc 11607
   addr       PID    PPID   RUID/UID     size      RSS     swresv time  command
============= ====== ====== ========== ========== ======== ======== ====== =========
0x4a1ae89d2f0  11607  11412          0 1310507008 592371712 625278976 6983501 /opt/ac
       thread: 0x4a1da697940  state: slp   wchan: 0x4a44e166134  sobj: condition var (from ufs:ufs_op+0x690)
       thread: 0x4a2d0695b60  state: slp   wchan: 0x4a1ae89d3b6  sobj: condition var (from genunix:holdlwps+0xbc)

 

 

>CAT(vmcore.23/10V)> tlist -s idle findcall ufs_delmap

 

> CAT(vmcore.23/10V)> tlist -s idle sobj locks
>  thread        pri pctcpu           idle   PID         wchan command
>  0x4a1205b2680  60  3.161  1d3h41m12.59s 27865 0x30063246dc0 ./aWriter

>CAT(vmcore.23/10V)> rwlock -L 0x30063246dc0
> possible read-lock owners:
>  thread        pri pctcpu           idle   PID         wchan command
>  0x39fc54c2700  60  0.001  1d3h41m44.31s  9782 0x30092c6e6b4 ./aWriter

 

 

 

Do drop in your queries below,

 

* First name (required):

* Last name (required):
* E-mail address (required):

Phone number:
* Message (required):