Unix System Kernel: Instructor: S. Kiptoo Computer Science & IT Kimathi University College of Technology
Unix System Kernel: Instructor: S. Kiptoo Computer Science & IT Kimathi University College of Technology
Unix: Introduction
Operating System: a system that manages the resources of a computer. Resources: CPUs, Memory, I/O devices, Network Kernel: the memory resident portion of Unix system File system and process control system are two major components of Unix Kernel.
cpp
cc as ld hardware
Libraries
File Subsystem
Process control
Device drivers
Hardware Level
File subsystem
A file system is a collection of files and directories on a disk or tape in standard UNIX file system format. Kernels file sybsystem regulates data flow between the kernel and secondary storage devices.
Hardware Control
Hardware control is responsible for handling interrupts and for communicating with the machine. Devices such as disks or terminals may interrupt the CPU while a process is executing. The kernel may resume execution of the interrupted process after servicing the interrupt.
Processes
A program is an executable file. A process is an instance of the program in execution. For example: create two active processes $ emacs & $ emacs & $ ps PID TTY TIME CMD 12893 pts/4 0:00 tcsh 12581 pts/4 0:01 emacs 12582 pts/4 0:01 emacs 9 $
Processes
A process has text: machine instructions (may be shared by other processes) data stack Process may execute either in user mode and in kernel mode. Process information are stored in two places: k Process table k User table
10
11
Process Table
Process table: an entry in process table has the following information: 4 process state: A. running in user mode or kernel mode B. Ready in memory or Ready but swapped C. Sleep in memory or sleep and swapped 4 PID: process id 4 UID: user id 4 scheduling information 4 signals that is sent to the process but not yet handled 4 a pointer to per-process-region table 12 There is a single process table for the entire system
Process table
Active process
resident swappable
text
Region table
u area
data stack
14
executed by several users simultaneously. The text (program) part can be shared. In order to be shared, a program must be compiled using a special option that arranges the process image so that the variable part(data and stack) and the fixed part (text) are cleanly separated. An extension to the idea of sharing text is sharing libraries. Without shared libraries, all the executing programs 15 contain their own copies.
Process table
text
Region table
data
stack Active process text data stack
Reference count = 2
16
System Call
A process accesses system resources through system call. System call for b Process Control: fork: create a new process wait: allow a parent process to synchronize its execution with the exit of a child process. exec: invoke a new program. exit: terminate process execution b File system: File: open, read, write, lseek, close inode: chdir, chown chmod, stat fstat 17 others: pipe dup, mount, unmount, link, unlink
main() { int fpid; printf("Before forking ...\n"); fpid = fork(); if (fpid == 0) { printf("Child Process fpid=%d\n", fpid); } else { printf("Parent Process fpid=%d\n", fpid); } printf("After forking fpid=%d\n", fpid); }
$ cc forkEx1.c -o forkEx1 $ forkEx1 Before forking ... Child Process fpid=0 After forking fpid=0 Parent Process fpid=14707 After forking fpid=14707 $
19
/* forkEx2.c */ #include <stdio.h> main() { int fpid; printf("Before forking ...\n"); system("ps"); fpid = fork(); system("ps"); printf("After forking fpid=%d\n", fpid); } $ ps PID TTY TIME CMD 14759 pts/9 0:00 tcsh $
$ forkEx2 Before forking ... PID TTY TIME CMD 14759 pts/9 0:00 tcsh 14778 pts/9 0:00 sh 14777 pts/9 0:00 forkEx2 PID TTY TIME CMD 14781 pts/9 0:00 sh 14759 pts/9 0:00 tcsh 14782 pts/9 0:00 sh 14780 pts/9 0:00 forkEx2 14777 pts/9 0:00 forkEx2 After forking fpid=14780 $ PID TTY TIME CMD 14781 pts/9 0:00 sh 14759 pts/9 0:00 tcsh 14780 pts/9 0:00 forkEx2 20 After forking fpid=0
21
/* pid.c */ #include <stdio.h> #include <sys/types.h> #include <unistd.h> main() { printf("pid=%d ppid=%d\n",getpid(), getppid()); } $ cc pid.c -o pid $ pid pid=14935 ppid=14759 $
22
/* forkEx3.c */ #include <stdio.h> #include <sys/types.h> #include <unistd.h> main() { int fpid; printf("Before forking ...\n"); fpid = fork(); if (fpid == 0) { printf("Child Process fpid=%d pid=%d ppid=%d\n", fpid, getpid(), getppid()); } else { printf("Parent Process fpid=%d pid=%d ppid=%d\n", fpid, getpid(), getppid()); } printf("After forking fpid=%d pid=%d ppid=%d\n", 23 fpid, getpid(), getppid());
$ cc forkEx3.c -o forkEx3 $ forkEx3 Before forking ... Parent Process fpid=14942 pid=14941 ppid=14759 After forking fpid=14942 pid=14941 ppid=14759 $ Child Process fpid=0 pid=14942 ppid=1 After forking fpid=0 pid=14942 ppid=1 $ ps PID TTY TIME CMD 14759 pts/9 0:00 tcsh
24
25
#include <stdio.h> #include <sys/types.h> #include <unistd.h> main() { int fpid, status; printf("Before forking ...\n"); fpid = fork(); if (fpid == 0) { printf("Child Process fpid=%d pid=%d ppid=%d\n", fpid, getpid(), getppid()); } else { printf("Parent Process fpid=%d pid=%d ppid=%d\n", fpid, getpid(), getppid()); } wait(&status); printf("After forking fpid=%d pid=%d ppid=%d\n", fpid, getpid(), getppid()); 26 }
$ cc forkEx4.c -o forkEx4 $ forkEx4 Before forking ... Parent Process fpid=14980 pid=14979 ppid=14759 Child Process fpid=0 pid=14980 ppid=14979 After forking fpid=0 pid=14980 ppid=14979 After forking fpid=14980 pid=14979 ppid=14759 $
27
/* execEx1.c */ #include <stdio.h> #include <unistd.h> main() { printf("Before execing ...\n"); execl("/bin/date", "date", 0); printf("After exec\n"); }
29
/* execEx2.c */ #include <sys/types.h> #include <unistd.h> #include <stdio.h> $ execEx2 Before execing ... After exec and fpid=14903 main() $ Sun May 9 16:47:08 CST 1999 { $ int fpid; printf("Before execing ...\n"); fpid = fork(); if (fpid == 0) { execl("/bin/date", "date", 0); } printf("After exec and fpid=%d\n",fpid); 30 }
Handling Signal
A signal is a message from one process to another. Signal are sometime called software interrupt Signals usually occur asynchronously. Signals can be sent A. by one process to anther (or to itself) B. by the kernel to a process. Unix signals are content-free. That is the only thing that can be said about a signal is it has arrived or not
31
Handling Signal
Most signals have predefined meanings: A. sighup (HangUp): when a terminal is closed, the hangup signal is sent to every process in control terminal. B. sigint (interrupt): ask politely a process to terminate. C. sigquit (quit): ask a process to terminate and produce a codedump. D. sigkill (kill): force a process to terminate. See signEx1.c
32
#include <stdio.h> #include <sys/types.h> #include <unistd.h> main() { int fpid, *status; printf("Before forking ...\n"); fpid = fork(); if (fpid == 0) { printf("Child Process fpid=%d pid=%d ppid=%d\n", fpid, getpid(), getppid()); for(;;); /* loop forever */ } else { printf("Parent Process fpid=%d pid=%d ppid=%d\n", fpid, getpid(), getppid()); } wait(status); /* wait for child process */ printf("After forking fpid=%d pid=%d ppid=%d\n", 33 fpid, getpid(), getppid()); }
$ cc sigEx1.c -o sigEx1 $ sigEx1 & Before forking ... Parent Process fpid=14989 pid=14988 ppid=14759 Child Process fpid=0 pid=14989 ppid=14988 $ ps PID TTY TIME CMD 14988 pts/9 0:00 sigEx1 14759 pts/9 0:01 tcsh 14989 pts/9 0:09 sigEx1 $ kill -9 14989 $ ps ...
34
Scheduling Processes
On a time sharing system, the kernel allocates the CPU to a process for a period of time (time slice or time quantum) preempts the process and schedules another one when time slice expired, and reschedules the process to continue execution at a later time. The scheduler use round-robin with multilevel feedback algorithm to choose which process to be executed: A. Kernel allocates the CPU to a process for a time slice. B. preempts a process that exceeds its time slice. C. feeds it back into one of the several priority queues.
35
Process Priority
Priority Levels
swapper wait for Disk IO wait for buffer wait for inode ... wait for child exit User level 0 User level 1 ... User level n
36
Processes
Process A Process B Process C Priority CPU count Priority CPU count Priority CPU count
60 0 60 30 60 0 60 0
1
2
75
60
67
15
75
0 60 30
60
60
3
4
63
76
7 67 33
67
15
75
0 60 30
63
7 ...
67
15
38
Part 2
39
Booting
When the computer is powered on or rebooted, a short built-in program (maybe store in ROM) reads the first block or two of the disk into memory. These blocks contain a loader program, which was placed on the disk when disk is formatted. The loader is started. The loader searches the root directory for /unix or /root/unix and load the file into memory The kernel starts to execute.
40
init process
The init process is a process dispatcher:spawning processes, allow users to login. Init reads /etc/inittab and spawns getty when a user login successfully, getty goes through a login procedure and execs a login shell. Init executes the wait system call, monitoring the death of its child processes and the death of orphaned processes by exiting parent.
42
Init fork/exec a getty progrma to manage the line When the shell dies, init wakes up and fork/exec a getty for the line Getty prints login: message and waits for someone to login The shell runs programs for the user unitl the user logs off
The login process prints the password message, read the password then check the password
43
File Subsystem
A file system is a collection of files and directories on a disk or tape in standard UNIX file system format. Each UNIX file system contains four major parts: A. boot block: B. superblock: C. i-node table: D. data block: file storage
44
...
Block n Block n+1
Block 2 - n:i-nodes
...
The last Block
45
Boot Block
A boot block may contains several physical blocks. Note that a physical block contains 512 bytes (or 1K or 2KB) A boot block contains a short loader program for booting It is blank on other file systems.
46
Superblock
Superblock contains key information about a file system Superblock information: A. Size of a file system and status: label: name of this file system size: the number of logic blocks date: the last modification date of super block. B. information of i-nodes the number of i-nodes the number of free i-nodes C. information of data block: free data blocks. 47 The information of a superblock is loaded into memory.
I-nodes
i-node: index node (information node) i-list: the list of i-nodes i-number: the index of i-list. The size of an i-node: 64 bytes. i-node 0 is reserved. i-node 1 is the root directory. i-node structure: next page
48
mode
owner timestamp
I-node structure
Data block Data block Data block Data block
Size
Reference count Block count Direct blocks 0-9 Single indirect Double indirect Triple indirect
...
Data block
...
Data block
Indirect block
Indirect block
...
I-node structure
mode: A. type: file, directory, pipe, symbolic link B. Access: read/write/execute (owner, group,) owner: who own this I-node (file, directory, ...) timestamp: creation, modification, access time size: the number of bytes block count: the number of data blocks direct blocks: pointers to the data single indirect: pointer to a data block which pointers to the data blocks (128 data blocks). Double indirect: (128*128=16384 data blocks) 50 Triple indirect: (128*128*128 data blocks)
Data Block
A data block has 512 bytes. A. Some FS has 1K or 2k bytes per blocks. B. See blocks size effect (next page) A data block may contains data of files or data of a directory. File: a stream of bytes. Directory format:
i-# Next size File name pad
51
home
alex
jenny
john
Report.txt grep
bin find
notes
i-#
Next
10
Report.txt
pad
i-#
Next
bin
pad
i-#
Next
notes
pad
Next52
home
Boot Block
kc
alex
... ...
i-node
Report.txt grep source find notes
i-node
...
i-node
u area
Current directory inode
In-core inodes
i-node
...
i-node
source Report.txt
...
i-node
File table
The kernel have a global data structure, called file table, to store information of file access. Each entry in file table contains: A. a pointer to in-core inode table B. the offset of next read or write in the file C. access rights (r/w) allowed to the opening process. D. reference count.
56
57
58
main() { int fd1, fd2, fd3; printf("Before open ...\n"); fd1 = open("/etc/passwd", O_RDONLY); fd2 = open("./openEx1.c", O_WRONLY); fd3 = open("/etc/passwd", O_RDONLY); printf("fd1=%d fd2=%d fd3=%d \n", fd1, fd2, fd3); }
59
U area
file table
...
CNT=1 R ...
in-core inodes
CNT=2 /etc/passwd
CNT=1 W
... CNT=1 R ...
...
CNT=1 ./openEx2.c
. . .
...
60
61
$ cc openEx2.c -o openEx2 $ openEx2 ======= fd1=3 buf1=root:x:0:1:Super-Us main() fd1=3 buf2=er:/:/sbin/sh { daemo int fd1, fd2, fd3; char buf1[20], buf2[20]; ======= $ buf1[19]='\0'; buf2[19]='\0'; printf("=======\n"); fd1 = open("/etc/passwd", O_RDONLY); read(fd1, buf1, 19); printf("fd1=%d buf1=%s \n",fd1, buf1); read(fd1, buf2, 19); printf("fd1=%d buf2=%s \n",fd1, buf2); printf("=======\n"); 62 }
#include <stdio.h> $ cc openEx3.c -o openEx3 #include <sys/types.h> $ openEx3 #include <fcntl.h> ====== main() fd1=3 buf1=root:x:0:1:Super-Us { fd2=4 buf2=root:x:0:1:Super-Us int fd1, fd2, fd3; char buf1[20], buf2[20]; ====== $ buf1[19]='\0'; buf2[19]='\0'; printf("======\n"); fd1 = open("/etc/passwd", O_RDONLY); fd2 = open("/etc/passwd", O_RDONLY); read(fd1, buf1, 19); printf("fd1=%d buf1=%s \n",fd1, buf1); read(fd2, buf2, 19); printf("fd2=%d buf2=%s \n",fd2, buf2); printf("======\n"); 63 }
U area
file table
...
CNT=1 R ...
in-core inodes
CNT=2 /etc/passwd
Descriptor table
...
... CNT=1 R ...
. . .
65
#include <stdio.h> $ cc openEx4.c -o openEx4 #include <sys/types.h> $ openEx4 #include <fcntl.h> ====== main() fd1=3 buf1=root:x:0:1:Super-Us { fd2=4 buf2=er:/:/sbin/sh int fd1, fd2, fd3; char buf1[20], buf2[20]; daemo ====== buf1[19]='\0'; $ buf2[19]='\0'; printf("======\n"); fd1 = open("/etc/passwd", O_RDONLY); fd2 = dup(fd1); read(fd1, buf1, 19); printf("fd1=%d buf1=%s \n",fd1, buf1); read(fd2, buf2, 19); printf("fd2=%d buf2=%s \n",fd2, buf2); printf("======\n"); char buf1[20], buf2[20]; 66 }
U area
file table
...
CNT=2 R ...
in-core inodes
CNT=1 /etc/passwd
Descriptor table
...
... ... ...
. . .
68
69
70
/* creatEx1.c */ #include <stdio.h> #include <sys/types.h> #include <fcntl.h> main() { int fd1; char *buf1="I am a string\n"; char *buf2="second line\n"; printf("======\n"); fd1 = creat("./testCreat.txt", O_WRONLY); write(fd1, buf1, 20); write(fd1, buf2, 30); printf("fd1=%d buf1=%s \n",fd1, buf1); close(fd1); chmod("./testCreat.txt", 0666); printf("======\n"); }
71
staff
72
/* statEx1.c */ #include <sys/stat.h> main() { int fd1, fd2, fd3; struct stat bufStat1, bufStat2; char buf1[20], buf2[20]; printf("======\n"); fd1 = open("/etc/passwd", O_RDONLY); fd2 = open("./statEx1", O_RDONLY); fstat(fd1, &bufStat1); fstat(fd2, &bufStat2); printf("fd1=%d inode no=%d block size=%d blocks=%d\n", fd1, bufStat1.st_ino,bufStat1.st_blksize, bufStat1.st_blocks); printf("fd2=%d inode no=%d block size=%d blocks=%d\n", fd2, bufStat2.st_ino,bufStat2.st_blksize, bufStat2.st_blocks); printf("======\n"); } 74
$ cc statEx1.c -o statEx1 $ statEx1 ====== fd1=3 inode no=21954 block size=8192 blocks=6 fd2=4 inode no=190611 block size=8192 blocks= ====== ...
75
76
77
$ ls -l /usr/bin $
78
79