Вы находитесь на странице: 1из 452

;-)

Linux

NOVELL PRESS


Ximian Desktop, Novell

www.williamspublishing.com

Novell


Linux

Linux Kernel
Development
Second Edition

Robert Love

Novell

Novell Press, 800 East 96th Street, Indianapolis,


Indiana, 46240 USA

Linux

-
2006

32.973.26-018.2.75
13
681.3.07
"c"
. ..
A.
"" :
info@williamspubiishing.com, http://www.williamspublishing.com
115419, , / 783; 03150, , / 152
, .
13 Linux, 2- . : . . . : ".. "
2006. 448 . : . . . .
ISBN 5-8459-1085-4 (.)
Linux
2.6, ,
. :
, , ,
, , VFS,
, .
Linux.
, ,
.
,
,
.
32.973.26-018.2.7

.

, ,
,
Novell Press.
Aufhorued translation from the English language edition published by Novell Press, Copyright 200
by Pearson Education, Inc.
All rights reserved. No part of this book shall be reproduced, stored in a retrieval system, or transmi
ted by any means, electronic, mechanical, photocopying, recording, or otherwise, without written permi
sion from the publisher.
All terms mentioned in this book that are known to be trademarks or service marks have been appro[
rialely capitalized. Novell Press cannot attest to the accuracy of this information. Use of a term in this boo
should not be regarded as affecting the validity of any trademark or service mark.
Russian language edition is published by Williams Publishing House according to the Agreement wit
R&I Enterprises International, Copyright 2006
ISBN 5-S459-10854 (.)
ISBN 0-72-2720-1 (.)

"", 2006
2005 by Pearson Education, Inc., 200

15

17

21

22

1. Linux

23

2. Linux

33

3.

45

4.

65

5.

95

6.

109

7.

131

8.

163

9.

177

10.

207

11.

233

12.

265

13. -

293

14.

311

15.

331

16.

343

17. kobject sysfs

355

18.

373

19.

389

20. ,

405

415

423

429

433

437

15
17

...


-

18
18
18
19
20

21
22

22

1. Linux

23

: Linux

Linux Unix
Linux
Linux

25
26
29
31
32
32

2. Linux

33









" "
l i b c
GNU




33
33
34
34
34
37
37
38
38
39
39
41
42
42
42
43
43

3.

45

task structure

46







Linux


.

""

47
48
50
51
51
52
53
54
57
59
59
61
61
63

4.

65


, -



.













,


67
67
68
69
70
70
71
72
74
75
78
81
83
87
88
88
89
91
91
92
92
93

5.

95

API, POSIX
syscall




96
97
98
99
99
99
100

6.














, !

7.



(softirq)



ksoftirqd








101
101
104
104
106
107
108

109
109
111
111
112
114
115
116
117
119
121
124
125
126
127
128

131
132
133
133
136
136
139
141
141
144
146
148
149
150
154
157
157
159
160
162

8.

163

164
164
167
169
170
172
174
176

9.



-
-
-
- -



-
-

BLK:



10.

: HZ
HZ
j i f f i e s
j iffies
jiffies
HZ


.


177

177
178
181
183
186
187
188
190
193
193
195
196
196
197
199
200
201
205

207
208
209
210
213
214
215
217
218
218
218
219
221
223
224
226





s c h e d u l e _ t i m e o u t ()

11.




kmalloc ()
gfp_mask
kf ()
vmalloc ()









,
percpu
, ,
, ,
,

12.


Unix
VFS
VFS
superblock

inode

dentry

dentry

10

226
227
227
229
230
232

233
233
235
238
238
239
240
241
245
246
248
249
252
254
256
257
257
257
258
259
260
260
261
263
264

265
266
266
267
269
270
270
272
274
276
278
280
280
281

file

,
,
Linux

13. -


b i o


.

-
-

-
-
-
-
noop
-

14.



mm_struct

VMA
VMA



find_vma ()
find_vma_prev ()
find_VMA_intersection ()
mmap () do_mmap ( ) :
mmap
()
munmap () do_munmap ():
munmap ()

15.

address_space

283
284
288
289
291

293
294
295
298
300
301
301
302
302
503
304
307
308
309
309
310

311
313
315
315
316
316
317
319
320
321
322
323
324
324
325
326
327
327
327
329

331
332
332
11


-

pdflush
bdflush kupdated
:

16.
"Hello,Worldl"









17. kobject sysfs


k o b j e c t
ktype
k s e t


k o b j e c t

kref
sysfs
sysfs
sysfs

kobj e c t sysfs

18.


p r i n t k ()
p r i n t k ()


s y s l o g d klogd
p r i n t k ()
Oops
ksymoops
12

335
336
336
337
339
339
341

343
343
345
345
347
347
347
348
349
351
353
354

355
356
357
358
358
359
360
361
362
363
365
366
369
371

373
373
374
375
375
376
377
377
378
378
380

kallsyms



SysRq

gdb
kgdb
kdb

UID




19.
Linux




c h a r





big-endian little-endian

20. ,

t y p e d e f

380
381
38]
382
382
384
384
385
385
385
385
386
386
387
388
388

389

'

390
391
394
394
395
396
396
397
397
398
399
401
401
401
402
403
403
404

405
405
406
406
406
407
408
408
408
410
13

,
i f d e f





.


Linux



-

,

.

Unix
Linux

API Unix

Web-

14

'

410
410
411
411
411
412
412
413
414

415
416
416
417
417
419
421

423
424
426
426
427

429
429
430
430
431
431

433
433
434
434
435
435
435
436

437

(Doris) (Helen)

, Linux , ,
Linux. , , Linux,
, .
:
. , , . , , -.
, Linux , ,
. , Linux, , ,
.

: , , " ,
" .. (Linus
Torvalds).
, , , ,
. ( . , ,
.)
, .
.

(Robert Love) , ,
.
: , , , , ..
, , , ,
. , : . ,
.

. ,
. .
! ,
, . , ,
Linux, , ,
.
(Andrew Morton)
Open Source Development Labs

16


Linux
, , , .
. ,
, - ,
. ? , , .
, . .
.
. ,
. ,
.
, Linux. , . , .
, .
, , , ( ),
. ,
, . ,
.
, .
. : , . . , ,
Linux 1
2.7. 2.6.
, , , , 2.6,
. ,
, " " .
, . , .
, , , .

1
Linux (Linux Kernel Development
Summit), 2004 . , .

17

...

, Unix-.
,
. , , ,
, .
, . Linux ,
. .
.
! ! ! ,
.


Linux 2.6 2.6.10.
" ", .
,
, .


, Linux.
.
(API)
(, API Linux ).
,
. ,
.
, ,
, , . ,
, , . , ,

Linux, .
,
, . , . 7, "
", (bottom half).

18


( ), , bottom half ( ).
,
. , , ,
, ,
. ,
.
, . ,
(API). ,
.
, ,
. ,
. ,
,
.
, , ,
, , .
,
,
, ,
.
, Linux. . ,
,
.
, , . ,
, . ;
.

-
- h t t p : //tech9.net/rml/kernel_book/, , , , . .

19


, , (
, ), , ,
.
,
, .
,
(Scott Meyers) ,
.
(Georg Nedeff), , . (Margo Catts). ,
, .

(Adam Belay), (Martin Pool) (Chris Rivera).
. ,
, , .
(Zak Brown), .
,
, ,
. (Andrea Arcangely),
(Alan ), - (Greg Kroah-Hartman),
(Daniel Phillips), (David Miller), (Patrick Mochel),
(Andrew Morton), (Zwane Mwaikambo),
(Nick Piggin) (Linus Torvalds). ( ).
.
(Paul Amichi), (Keith Barbag), (Dave Eggers),
(Richard Erickson), {Nat Friedman),
(Dostin Hall), (Joyce Hawkins), (Miguel de Icaza),
(Jimmy Krehl), (Doris Love), (Jonathan Love),
(Patrick LeClair), (Linda Love), '
(Randy O'Dowd), (Salvatore Ribaudo)
, (Chris Rivera), (Joey Shaw),
(Jeremy VanDoren) , (Steve Weisberg)
(Helen Whinsnant).
, .
!
,
. , , .

20


(Robert Love) Linux
. GNOME.
Ximian Desktop Novell.
Vista Software.
, , , () (preemptive kernel),
, (VM),
.
schedutils GNOME.
Linux Journal.

. , , . , .

21


, , . , ,
.
, .
. Web-
. ,
, , ,
.
, , .
.
:
E-mail:
WWW:

info@williamspublishing.com
http://www.williamspublishing.com

:
:
:

115419, , / 783
03150, , / 152


Sams
Publishing - www. nowellpress.com.
ISBN ( ) .

22

Linux

()
Unix
. Unix 1969 , (Dennis Ritchie)
(Ken Thompson) , ,
.
Unix Multics , Bell
Laboratories. Multics, Bell Laboratories
Computer Sciences Research Center . 1969
Bell Labs , Unix.
PDP-7. 1971
Unix PDP-11, 1973 ,
, . Unix, Bell Labs, Unix
System 6, V6.
Unix .
, , ,
,
1977 Bell Labs
Unix System III, 1982 AT&T System V1.
Unix, ,
, ,
.
(University of California at Berkeley).
1

System IV? , o .

Unix Berkeley Software


Distributions (BSD). Unix,
1981 , 3BSD. 4BSD:
4.0BSD, 4.1BSD, 4.2BSD 4.3BSD. Unix
, (demand paging)
TCP/IP. Unix
4.4BSD, 1993 ,
. BSD
Darwin, Dragonfly BSD, FreeBSD, NetBSD OpenBSD.
1980-1990- , , Unix. AT&T ,
. Tru64
Digital, HP-UX Hewlett Packard, AIX IBM, DYNIX/ptx
Sequent, IRIX SGI, Solaris Sun.
Unix , , Unix , .
Unix . -,
Unix :
, Unix- . -, Unix 2.
,
: open (), read (), w r i t e ( ) , i o c t l () c l o s e (). -, Unix
- Unix .
Unix f o r k ( ) . , Unix ,
, , , , ,
.
Unix , , , , , , ,
TCP/IP. Unix , Unix
. Unix
,
( )
2

, , , . , Plan9 ( Unix), .

24

Unix,
.
Unix . ,
,
Unix .

: Linux
Linux (Linus
Torvalds) 1991 ,
Intel 80386.
Unix- .
DOS, Microsoft,
, " ", . Minix, Unix- , .

( Minix), , Minix.
, . ,
Unix- . , . , Unix-. 1991
.
, Linux
. Linux , ,
, .
, Linux ,
.
Linux ,
AMD 86-64, ARM, Compaq Alpha, CRIS, DEC VAX, H8/300, Hitachi
SuperH, HP PA-RISC, IBM S/390, Incel IA-64, MIPS, Motorola 68000, PowerPC, SPARC,
UltraSPARC v850. , ,
- . Linux . ,
Linux {Monta Vista Red Hal), (IBM, Novell) , .
Linux Unix, Linux
Unix. Linux Unix, Linux
API Unix ( POSIX Single Unix
Specification), Linux
Unix, Unix-, , ,
, ,

Linux

25

Unix .
Linux ,
; , , . ,
Linux , .
Linux. Linux,
,
3.

, Linux GNU General Public License


(GPL) 2.0. .
,
, 4.
Linux
. ,
, , , , (login) (shell). Linux
X Windows, - (desktop
environment), , , GNOME. Linux . Linux, ,
Linux. , ,
, Linux .
, Linux .


- ,
. , ,
, . , , , . , (boot loader),
, .
, .
, .
, .
, 3

, ,
h t t p : / / w w w . f s f . o r g h t t p : / / w w w . o p e n s o u r c e . o r g .
4

, GNU GPL, . COPYING,


, .
http://www.fsf.org.

26

. , ,
.
(core) . , , ,
, , , ,
.
.
. , , , ,
( , kernel-space). ,
(
, , user-space). ,
, . () ,
, .
, , (system call) (. 1.1).
, , , ,
, .
,
, . p r i n t f ().

w r i t e () .
. , open () ,
open () . , , , s t r c p y (),
, .
, , . , ,
, . ,
, .
.
, , Linux, (interrupt). - , ,
5.
5

, ,
. - *. .

Linux

27

. 1.1. , . .

.
(interrupt handler), . ,
, ,
, .
, . ,
, , . : ,
. .
(interrup context), . ,
.

. , , Linux
.
.

28

,
.

Linux
Unix
API, Unix
.
Unix . ,
,
. Unix (memory management unit);

.
Unix.

, ,
: . ( , , , ,
.)
, 1980- . , ,
.
. , , , .
, .
.
.
, . , ,
. . , ,
. , , , . .
(Inter Process Comrrmnication, IPC)
, ""
IPC. .
, .
IPC , ,
,
, .

Linux

29

,
, ,
, , .
Windows NT, Mach ( Mac OS X) - . Windows NT,
Mac OS X ,
.
Linux , .. ,
. Linux
: ,
( ).
Linux ,
: ,
. , Linux , , .
.

Linux,
, Linux ,
Unix (, , API Unix).
Linux - Unix,
! Linux, Unix.
Linux .
Linux , .
Linux (SMP).
Unix SMP, Unix
.
Linux .
Unix, Linux ,
.
Unix Solaris IRIX.
Linux (threads): .
,
.
Linux Unix,
, , , STREAMS,
"" .
Linux .
, Linux, Linux. - ,

30

. ,
Linux "" :
,
. , Unix, ,
. , Linux Unix.

Linux
Linux -: (stable) (development). - , .

. , ,
.
, .
Linux
(. 1.2.). , , . - (major) , - (minor), -
(, revision). ,
; , ,
, . , , 2.6.0 . 2, 6 0.
" ", 2.6.
6
( )

2.6.0
. 1.2.

. , .
,
.

. .
, .
. (
) , , . ,
2.5 2.6.

Linux

31


- .
. , .
2004 Linux 2.6 Linux 2.7. , 2.6
; , , ,
.
, , , , , .
, , 2.6
. ,
.

2.6.

Linux
Linux, Linux.
Linux (linux-kernel mailing list). h t t p : / / v g e r . k e r n e l . o r g .
, ( 300 ) ( , ) . ;
, .

, .


Linux: ,
.
, .
,
.

, .
, Linux
. , Linux, "" , , . , Linux,
. ,
. !
!

32

2

Linux

, Linux:
,
. , Linux, , , .
, ,
, . .
.



tar (tarball),
http://www.kernel.org.

, .
kernel.org , ,
.


tar
GNU zip (gzip) bzip2. bzip2 , gzip.
bzip2 l i n u x - x . . z . t a r . b z 2 , , , z .
. tar- GNU zip, .
$ tar xvzf linux-x..z.tar.gz

bzip2,
.
$ tar xvjf linux-x..z.tar.bz2


l i n u x - x . y . z .

/ u s r / s r c / l i n u x . , . , , . ,
, root,
root
. / u s r / s r c / l i n u x
.


Linux (patch) .
. (incremental
patch), . ,
.
.
, ,
.
$ patch -p1 < ../patch-..z

.
.


,
. ,
, . 2.1.
, ,
. COPYING (GNU GPL v2).
CREDITS ,
. MAINTAINERS , . , Makefile .


. ,
, ,
, g l i b c . 2.6 ,
2.4.
34

2 . 1 .

arch

crypto

API

Documentation

drivers

fs

VFS

include

init

ipc

kernel

lib

mm

net

scripts

security

Linux

sound

usr

(initramfs)

Linux, , ,
. . , . ,
, , .
CONFIG_FEATURE. , (Symmetric multiprocessing, SMP) CONFIG SMP. ,
SMP . , SMP . . c o n f i g
, , make xconfig.
, , .
: (boolean) (instate).
yes . , CONFIG_PREEMPT,
. yes, no module. module , ,
(.. , ). .

Linux

35

, , .
, , . ,
.
, Linux ,
Novell Redhat, . .
. , , ,
, .
,
.
:

make config

,
yes, no module { ).
, , ncurses:
make menuconfig

X11:
make xconfig

, gtk+
make gconfig

,
Processor Features ( ) Network Devices ( ).
, ,
.
$ make defconfig

, ,
. ( , i386 ), ,
. ,
, ,
.

.config. ,
, .
. 36


, :
make oldconfig
, .
, :
make
, 2.6
make dep , . , bzlmage, . , Makefile, ,
!


, ,
, ,
, make (1):
make > "__"
,
. ,
, .

make > /dev/null,
.


make (1) .
, . ,
- (, -).
make (1) ,
. "
", . ,
. .
$ make -jn
n , .

Linux

37

. , .
$ make -j4
, d i s t c c (1) c c a c h e ( l ) , .


, . .
,
, . , , !
, x86, grub
a r c h / i 3 8 6 / b o o t / b z l m a g e
/boot / e t c / g r u b / g r u b . c o n f ,
. , LILO, / e t c / l i l o . c o n f l i l o (8).
. root.
$ make modules_install

System.map. .
.

" "
,
, .
.
, .
( , , ),
. .
.
GNU .
, .
.
.
38

, SMP,
.
.
,
.

l i b c
,
( ). ,
,
. ,
, .
, . , l i b / s t r i n g . .
< l i n u x / s t r i n g . h > .

, , , .
,
,

p r i n t f ( ) .
p r i n t f ( ) , p r i n t k (). p r i n t k ( ) (kernel log buffer), s y s l o g .
p r i n t f ( ) :
printk("Hello world! : %s : %d\n", a_string, an_integer);
p r i n t f () p r i n t k () ,
p r i n t k () . s y s l o g , , .
:
printk(KERN_ERR " !\n");
p r i n t k () . p r i n t k ().

GNU
" " Unix, Linux .
, , Linux
ANSI . , , -

Linux

39

, gcc
(GNU Compiler Collection GNU,
, ).
ISO C991 GNU . Linux gcc, , Imel , gcc ,
Linux. - 99,
, 99 ,
.
, , ANSI
GNU . , .


GNU (inline functions).
, , , .
( ) ,

. ( ) , .
,
. , , .
s t a t i c
i n l i n e . ,
static inline void dog(unsigned long tail_size);

, .
.
( s t a t i c ) , .
,
.
.

ISO C99 ISO . 99 .


ISO C99 complex.

40

Ill


gcc . , ,
, .

asm().
Limix . ,
. .


gnu ,
, .
.
l i k e l y ( ) u n l i k e l y ( ) ,
. , if :
if (foo) {
/*..*/
}
, ,
:
/* , foo ..*/
if (unllkely(ffoo)) {
/*..*/
}
,
/* , foo ..*/
if (likely(foo)) {
/*..*/
}
, - . ,
,
, . u n l i k e l y () l i k e l y ()
.



,
. ,
.
oops, Linux

41

. ,
, NULL ,
!
, .
, , . , .



,

. ,
, .
, , .

, . , : ; .


"" ,
. ,

(, , , , DOS, , ).
, , , , .
. 86 4 8 . , , 8
32- 16 64-.
. .
.


(race condition). , ,
.
, .

42

Linux . , , ,
.
.
, ,
,
.
Linux . , ,
, .

( ) - .

.



, Linux . , - , ,
.
,
, 64- ,

.
.

, : , , ,
. Linux .
, ; , ,
. .
, , ,
, . , , , .
, , , , ,
, .
.

Linux

43

- Unix- 1. , , .. ,
- . , Unix text section (
). (data section), ; ,
;
. .
,
(thread), , . (program
counter), . , . Unix- .
. , Linux .
Linux .

: .
, , , , , . 4,
" ", . , . 11, " ".
, .

, ; .
, , . ,
, ,
. , . Linux
fork() (, ), . , fork (), (, pannt), (, child).

,
. - . exec*()
. Linux fork() clone(),
.
e x i t ( ) .
.
wait4() 2 ,
. ,
(zombie), , wait()
waitpid().
(task). Linux . ,
,
.

task structure
, task list3 ( ). s t r u c t task_struct,
i n c l u d e / l i n u x / s c h e d . h .
.

w a i t 4 ( ) . Linux
w a i t ( ) , w a i t p i d ( ) , w a i t 3 ( ) w a i t 4 ( ) .
, .
3

t a s k a r r a y
( ). Linux , ,
task l i s t .
i

46

t a s k _ s t r u c t 1,7 32- . ,
, ,
. , , , , , (. 3.1).


t a s k _ s t r u c t , (slab allocator), -

(cache coloring)
(. 11, " "). 2.6 t a s k _ s t r u c t
. , (, ,
86), , (stack pointer),
. ,
thread_info, ( ,
)
( , )4 (. 3.2.).
struct task struct
struct task_struct
struct task_struct
struct task struct
unsigned long state;
int prio;

unsigned long policy;


struct task_struct *parent;
struct list_head tasks;

pid_t pid;

(task list)
. .1.

thread_info , , ,
.

47

current_thread_inf()

struct thread_inf

thread_inf

struct task_struct
3.2.

struct thread_info 86 <asm/


thread_info.h> .
struct thread_info {
struct task_struct
struct exec_domain
unsigned long
unsigned long
u32
__s32
mm_segment_t
struct restart_block
unsigned long
__u8
};

*task;
*exec_domain;
flags;
status;
cpu;
preempt_count;
addr_limit;
restart_block;
previous_esp;
supervisorytack[0];

thread_info
. thread_info t a s k
task_struct .


,
(process identification, PID). PID

, pid_t5 , int.
5

(opaque type) ,
.

48

, Unix Linux 32768


( short int). pid .
, , . 32768 ,
. , ,
: ,
. , //
sys/kernel/pid_max.

t a s k _ s t r u c t . , , , t a s k _ s t r u c t . ,
, , current.
. t a s k _ s t r u c t , , , . ,
, ,
, thread_inf . thread_info, t a s k _ s t r u c t
.
86 current
13 thread_inf. current_thread_info ().
.
movl $-8192, %eax
andl %esp, %eax

c u r r e n t
task thread_info:
current_thread_info()->task;


PowerPC ( RISC- IBM),
c u r r e n t r2.
, ,
8, .
,
.

49


s t a t e
(. 3-3).
.


fork()

TASK_ZOMBIE
( )


: schedule ()
concext_switch ()


do exit()

TASK_RUNNING

(,
)

TASK_RUNNING

(]

TASK_INTRRUPTIBLE
TASK_UNINTERRUPTTLE
( )

. 3.3.

,
.
TASK_RUNNING (runnable). ,
, , ( , runqueue, 4. " ").
TASK_INTERRUPTIBLE (
, sleeping), ..

50

. ,
TASK__RUNNING. (wake up) .
TASK_UNNTERRUPTIBLE - TASK_INTERRUPTIBLE, , .
,
, .
, TASK_UNINTERRUPTIBLE , TASK_INTERRUPTIBLE6.
TASK_ZOMBIE ,
w a i t 4 ( ) . ,
. wait4 (),
.
TASK_STOPPED .
. , - SIGSTOP, SIGTSTP, SIGTTIN SIGTTOU,
, .


.

set_task state(task, state);
/* 'task' 'state' */
. ,
(memory barrier), ( SMP-). :
task->state = state;
s e t c u r r e n t s t a t e ( s t a t e ) s e t _ t a s k _
state(current, state).



. (executable) . . (. 5,
" ") , .
6

- "" ,
ps (1) , D, , SIGKILL. , ,
, , - .

51

, " "
. current 1.
, . ,
.

.
.


Linux . i n i t , PID 1. i n i t
. i n i t , , (initscripts) , .
.
, .
, , (siblings). . t a s k _ s t r u c t
t a s k _ s t r u c t , p a r e n t ,
,
children. , (current),
:
struct task_struct *task = current->parent;

,
, :
struct task_struct *task;
struct list_head *list;
list_for_each (list, scurrent->children) {
task = list_entry(list, struct task_struct, sibling);
/* task ,
*/
}

i n i t i n i t t a s k .
, .
1

, 6, " ". ,
. ,
.

52

struct task_struct *task


for (task = current; task ! = $init_task; task = task->parent)
/* task init */
, ,
. , ,
. , . , - , :
list_entry(task->tasks.next, struct task_struct, tasks)
.
list_entry (task->tasks.prev, struct task_struct, tasks)
next_task (task)
( ), p r e v _ t a s k ( t a s k ) ( ).
, for_each_process (task)
. t a s k :
struct task_struct *task;
for_each_process(task) {
/* PID
*/
printk("%s[%d]\n", task->comm, task->pid);
}
, ,
, .
(
).


Unix .
(spawn).
, ,
. Unix ,
: fork () exec ( ) 8 .

8
e x e c ( ) e x e c * ( ) .
e x e c v e ( ) , e x e c l p ( ) , execle(),
execv()
execvp().

53

f o r k ( ) ,
. PID ( ), PPID ( PID ,
PID ), , ( ),
- exec ()
.
f o r k () e x e c ()
, .


f o r k ( )
.
. Linux fork () (copy-on-write) .
(copy-on-write, COW)
.
.
,
, , . , ,
,
(read-only). ,
. ,
, , , exec () fork (),
. ,
f o r k ( ) ,
. (
10 ), .
, Unix
.

f o r k ()
Linux f o r k ()
c l o n e () .
, , (
) . "
Linux" .
f o r k ( ) , v f o r k ( )
c l o n e d c l o n e ()
. c l o n e ()
do_fork ( ) .

54

do_f ork (),


kernel/fork.. , , copy_pracess () . , copy_process ().
dup_task_struct (), , thread_info task_struct ,

. .
,
.
.
. .
TASK_
UNINTERRUPTIBLE, ,
.
copy_process () copy_f lags (), flags task s t r u c t .
PF_SUPERPRIV, , . PF_FORKNOEXEC, , exec (), .
get_pid () , PID .
, clone (), , , ,
(namespace). . .
( 4,
" ").
,
.
do_fork () .
copy_process () , . 9.

9
, ,
, .

55

,
exec () , , ,
, .

v f o r k ()

vfork () ,
fork (), , . ,
exec () .
. 3BSD,
fork ()
. ,
,
vfork ()
. - Linux
10,
. vfork () (, , , exec () ?),
, vfork () . vfork ()
fork (), Linux 2.2.

vfork () clone (), .

copy_process () vfork_done
t a s k _ s t r u c t NULL.

do_fvork (), , vfork_done ( ).

,
, copy_process () , , vfork_done.

mm_release () (
, ), vfork_done NULL, .

do_fork() .
10

Linux. , , 2.6
, .

56

, , ,
. , .

Linux
. .
.
(concurrent programming),
.

Linux . Linux
. Linux
, . Linux
-
. , .
t a s k _ s t r u c t (
, , ).
Linux ,
Microsoft Windows Sun Solaris, ( , lightweight process). " " Linux
. ,
, ,
. Linux
( ) 11 .
, , .
,
. , . , . Linux,
, , ,
t a s k _ s t r u c t . , .
, , ,
c l o n e () , :
Clone (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, 0 ) ;
11

( )
Linux .
.

57

,
f o r k (), , , , . , , , . , f o r k ()
:
clone (SIGCHLD, 0 ) ;
v f o r k () :
clone (CLONE_VFORK | CLONE_VM | SIGCHLD,

0);

, c l o n e () , ,
. . 3.1
c l o n e () .
3 . 1 . c l o n e ()

CLONE_FILES

CLONE_FS

CLONE_IDLETASK

PID (
(idle) )

CLONE_NEWNS
CLONE_PARENT

CLONE_PTRACE
CLONE_SETTID
CLONE_SETTLS


TID ?
(thread local storage, TLS)

CLONE_SIGHAND

CLONE_SYSVSEM


SEM_UNDO System V

CLONE_THREAD

CLONE_VFOK

v f o r k ():
,

CLONE_ONTRACED


CLONE_PTRACE

CLONE_3T0P
CLONE_CHILD_CLEARTID
CLONE_CHILD_SETTID
CLONE_PARENT_SETTID
CLONE_VM

T A S K _ S T O P P E D

58

TID
TID
TID


.
(kernel thread)
, .

, ( mm NULL).
, .
,
.
Linux ,
, pdflush ksoftirq. .
, .
:
int kernel_thread(int (*fn) (void * ) , void * arg, unsigned long flags)
c l o n e () , f l a g s .
t a s k _ s t r u c t .
, fn,
arg. CLONE_KERNEL,
CLONE_FS, CLONE_FILES CLONE_SIGHAND, f l a g s .

(, , ,
Linux- ). ,
,
.
.


, .
, , ,
, , ,
, , "".
, e x i t ()
( e x i t ()
main ()). . , , -

59

. , , d o e x e c (),
.
PF_EXITING flags task s t r u c t .
del_timer_sync (), .
,
.
,
(BSD process accounting), acct_process ()
, .
__exit_mm() mm_struct,
. ( , ), .
exit_sem (). IPC,
.
__exit_files (), __exit_fs () , exit_namespace ()
e x i t _ s i g n a l s () , , ,
. , ,
.
, e x i t c o d e
t a s k s t r u c t . e x i t () , - .
e x i t n o t i f (), (reparent) ,
- , i n i t . TASK_ZOMBIE.
schedule () (. 4, " ").
TASK_ZOMBIE , , .
do_exit () k e r n e l / e x i t . .
, ( ). (,
, ), , TASK_ZOMBIE. ,
, , thread_inf task_struct.

60

, .


do_exit ()
, TASK_ZOMBIE . ,
. , .
, task_struct .
wait () (
) wait4 (). , . PID . ,
,
.
, release_task (), .
free_uid () . Linux ,
. ,
,
.
unhash_process () - pidhash .
(ptrace),

, (pirate)
.
p u t _ t a s k _ s t r u c t ()
, thread_inf, a
, task_struct.
, , , .

""
, , -
, , ,
, .
:

61

-
, i n i t . d o _ e x i t ()
n o t i f y _ p a r e n t (), f o r g e t _ o r i g i n a l p a r e n t ()
(reparent),
.
struct task_struct *, *reaper = father;
struct list_head *list;
if (father->exit_signal != -1)
reaper = prev_thread(reaper);
else
reaper = child_reaper;
if (reaper == father)
reaper = child_reaper;
r e a p e r
. , r e a p e r c h i l d _ r e a p e r ,
i n i t . , ,
, .
list_for_each(list, &father->children) {
= list_entry(list, struct task_struct, sibling);
reparent_thread(p, reaper, child_reaper);
}
list_for_each (list, sfather->ptrace__children) {
p = list_entry(list, struct task:_struct, ptrace_list);
reparent_thread(p, reaper, child_reaper);
}
: child list , ptraced child list. ,
, ( 2.6). ptrace,
, (debugging).
,
. .
, ,
, , :

.
,
, - .
62

I n i t wait () , , -, .

.
, ,
. , , Linux , ( t a s k _ s t r u c t t h r e a d _ i n f ) , (
clone () fork ()), ( exec ()), ,
(
wait ()) (
e x i t ()).
, , , , ( ).
, ,
, - .

63


, .
, .
(scheduler) , , . , (, , )
, ,
. (multitasking) , Linux. ,
, , .
, , . , , , -
. , , . (runnable). ,
, ,
, .
, .
, .
.
, . , , .
, , (
, ,
..). , Linux 100 ,
.

(multitasking) : (cooperative) (preemptive,

) . Linux, Unix ,
. , , , . , , (preemption)
. , ,
, . (timesiice)
. , . . , ,
.
, Linux
, .
,
,
. ,
,
(yielding). : ,
; ,
; "" ,
, . , , , . Mac OS 9
. , Unix
.
Linux 2.5,
. 0(1)-
(0(1) scheduler)
1 .
Linux , .
,
(1)-, 0(1)-, , , .

(1) " ". , ,


, . , " ",
, " ".

66


(policy) , , .
. , .

,
-
, - (I/0-bound), , (processor-bound). ,
-
. , ,
,
- (
-, - ,
, , ).
, , , .
, , -.
-, , . , , ,
, . ,
. , , .
. : X Windows ,

-.
-, . , , ,
.
:
( , low latency)
(throughput).
, , ,
, . Unix- , ,
-, .
, -, -

67

,
-. Linux ( ), .. ,
-, .
, , ,
.



(priority-based). , . , ,

( , round-robin), .. .
, Linux,
. , , , . ,
.
Linux (dynamic priority-based), .
, ,
. , ,
, -,
-. Linux
. ,
, , , .
.
Linux .
nice, -20 19,
0. nice
(ic . , ).
nice ( )
niie ( ).
nice , . nice -20 ,
nice 19 . nice Unix .

68


(real-time priority), . 0 99.
.
Linux POSIX. Unix- .


(timeslice2) , ,
, . , , .
, .
,
. , , - . , -,
, , , , , .
,
.
, ,
, 20 . Linux
, .
Linux ,
. Linux ( 4.1). , Linux
. , ,
.
.
,
. , ,
100 , 100 ,
. 20 .
2
timeslice ( ) quantum () processor slice. Linux timeslice.

69


100

800

. 4.1.

, , , ,
.
, ,
. , , , .
, ,
. . Linux , . .


, Linux . TASK_RUNNING, . ,
, , ,
( , ). ,
,
.


:
.
-,
( ,
, ). , ,
. .
,
,
, 100%. -

70

: , . , , .
, ,
. . , ,
. , .
, ,
.
.



Linux. , , , Linux.
Linux
k e r n e l / s c h e d . c .
,
2.5. , . , .
(1) -. ,
, ,
.
SMP-.

.
SMP- (SMP affinity). ,
,
, , ,
. .

.
.
(fairness). .
, .

71

, 1-2,

, .


(runqueue).
kernel/sched.c 3
s t r u c t runqueue. .
.
. , , . ,
. , .
.
struct runqueue {
spinlock_t lock; /* - */
unsigned long nr_rinning; /* , */
unsigned long nr_switches;
/* */
unsigned long expired timestamp; /* */
unsigned long nr_uninterruptible; /*
*/
unsigned long long timestamp last tick; /*
*/
struct task_struct *curr; /* ,
*/
struct task_struct *idle; /* */
struct mm_struct *prev_mm; /* mm_struct
*/
struct prio_array "active; /* */
struct prio_array 'expired; /* */
struct prio_array arrays[2]; /* */
struct task_3truct *migration_thread; /*
*/
struct list_head migration_queue;
/*
*/
atomic_t nr_iowait; /* , - */
};

: kernel/sched.,
include/linux/sched.h?
, .

72

, ,
. cpu_rq (processor) , , .
this_rq () , . , task_rq(task) ,
.
,
( 8, " "). , (, , ).

, , , .
, , . tapk_rq_lock ()
task_rq_unlock(), .
struct runqueue *rq;
unsigned long flags;
rq = task_rq_lock(task, &flags);
/* */
task_rq_unlock (rq, &flags);

this_rq_lock (), , rq__unlock (struct


runqueue *rq), .
, ,
, ( 8,
" ", ).
, , .
/* , ... */
if (rql < rq2) (
spin_lock (s,rql->lock] ;
spin_lock(Srq2->lock) ;
} else (
spin_lock(Srq2->lock) ;
spin_lock(&rql->lock)
}

/* ... */
/ , ... */
spin_unlock(brql->lock) ;
spin_unlock(&rq2->lock);

73

double_rq_lock () double_rq_unlock ()
. .
double_rq_lock(rql, rq2);
/* ...*/
double_rq_unlock(rql, rq2) ;
, , . 8, "
" 9, " ".
:
. -
. , . , , ,
.
, ( ), ,
. (, spinning),
, , . , , , , , . ,
,
.
, .
, . .
,
, ,
, .
,
. 8 9 .


(priority arrays): . k e r n e l / s c h e d . c
s t r u c t p r i o _ a r r a y . , 0(1)-. , . (priority bitmap),
,
.
struct prio_array (
int nr_active;
/* */
unsigned long bitmap[BITMAP_SIZE]; /* */
struct list head queue[MAX_PRIO];/* */
};
74

MAX_PRIO . 140. , s t r u c t l i s t _ h e a d . BITMAP_SIZE


, unsigned long.
. 140 32- , BITMAP_SIZE 5. , bitmap
, 160 .
b i t m a p , .
0. ( TASK_RUNNING),
b i t m a p 1.
, , 7, , 7.

. , ,
, , . , Linux (find first set) .
s c h e d _ f i n d _ f i r s t _ b i t ( ) .
4.
.
,
s t r u c t l i s t _ h e a d . queue.
.
, , ,
. , ,
. .
n r _ a c t i v e , .


( Linux)
, .

86 bsfl, cntlzw.

75

, ,
.
for ( ) (


}


. .
. ,
(n), n .
-
.
.

.
, (
- Linux).
Linux .
: (active) (expired). ,
, .
, . -
, , , .

.
, ,
. schedule ().
struct prio_array array = rq->active;
if (!array->nr_active) {
rq->active = rq->expired;
rq->expired = array;
}

, O(1)-.

, (1)- . .

76

schedule ()

schedule ().
(sleep), a
- . schedule () . , , .
schedule () , , . .
struct task_struct *prev, *next;
struct list_head *queue;
struct prio_array *array;
int idx;
prev = current;
array = rq->active;
idx = sched_find_first_bit(array->bitmap);
queue = array->queue + idx;
next = list__entry(queue->next, struct task struct, run_ist);


. .
, . , .
. 4.2.
schedule()
sched_find_first_set()
0 0

7 7





140

139 139


,
7

. 4.2. (1)- Linux


77

prev next ,
(next). , prev, , next, context_switch (),
. .
. , , , . -,
.
. , schedule ()
. .


, , , .
, , -
, , . ,
.
, nice.
-20 19, 0. 19 , -20
. nice s t a t i c _ p r i o t a s k _ s t r u c t . ,
,
. , prio. .
effective_prio () . ic -5 5,
. , ,
nice, 10, ,
5. , nice, 10,
, ,
12. , ,
, , nice.
, , . ,
, - . (sleep).
, -.
, 78

, . ,
,
-;
, .
Linux , , , ,
. sleep_avg
t a s k _ s t r u c t .
MAX_SLEEP_AVG, 10 . , sleep_avg
,
, sleep_avg MAX_SLEEP_AVG.
, (timer tick) , 0.
, , . , ,
, . , ,

, : ,
, .
. ,
, , . , .

sleep_avg. ,
nice, nice .
, , , nice ( ).

. ,
.

. , , . task_timeslice ()
.
. ,

. MAX_TIMESLICE, 200 . MIN_TIMESLICE, 10 .

79

, ( nice,
), 100 , . 4.1.
4.1.




nice
,

+19
0
100

-20

5 (MIN_TIMESUCE)
(DEF_TIMESLICE)
800 (MAX_TIMESLICE)

:
,
,
. , :
, . ,
O(1). , ,
, ,
"" . . , ,
, .
s c h e d u l e r _ t i c k (), ( 10, " "), .
struct task_struct *task = current;
struct runqueue *rq = this_rq();
if (!--task->time_slice) {
if (!TASK_INTERACTIVE(task) || EXPIRED_STARVING(rq))
enqueue_task(task, rq->expired);
else
enqueue_task(task, rq->active);
}
,
. ,
. SK_INTERACTIVE ().
nice , "
". nice ( ), . nice, 19,
-

80

. , nice, -20,
, . nice, , .. ,
, ,
. , EXPIRED_STARVING ( ) , , ,
(startving), .
,
, , . , . , .



( ,
, sleeping, blocked) , . ,
, "" , , ,
, . ,
.
,
- .
,
( 9,
" ").
-,
r e a d () , .
.
: , (wail queue),
s c h e d u l e d
. (wake up) :
,
.
,
: TASK_INTERRUPTIBLE TASK_UNINTERRUPTIBLE.
, TASK_UNINTERRUPTIBLE , TASK_INTERRUPTIBLE .
, , , .

(wait queue). ,

81

. wait_queue_head_t. DECLARE_WAIT_QUEUE_HEAD () i n i t _ w a i t q u e u e _ h e a d ().


. , , , , . ,
(race).
,
. : , .
.
.
/* q ( ),
*/
DECLARE_WAITQUEUE(wait, current) ;
add_wait_queue(q, &wait);
set_current_State(TASK_INTERRUPTIBLE); /* TASK_UNINTERRUPTIBLE */
/* condition ,
*/
while (!condition)
schedule() ;
set_current_state(TASK_RUNNING);
remove_wait queue(q, &wait);
, ,
.
DECLARE_WAITQUEUE ( ) .
add w a i t _ q u e u e () .
, , , . , -
, wake_up () , .
TASK_INTERRUPTIBLE TASK_
UNINTERRUPTIBLE.
, . ,
.
, s c h e d u l e ().
, . ,

82

. , schedule ()
.
, TASK_RUNNING
remove_wait_queue().

,
,
. ,
. , schedule () ; ,
-ERESTARTSYS; .
(wake up) wake_up (),
, , . try_to_wake_up () ,
TASK_RUNNING,
activate_task () need_resched , ,
, . , , wakeup ()
, . , , VFS wake_up ()
, , .
, . , , , , :
, , ,
, (. 4.3).


, Linux
. ,

. , , . - ?
, , ,
, ? , ,
. .

83

add_wait_que-je() ,
TASK_INTERRUPTIBLE
schedule(). scheduled
deactivate_task(), .

,

TASK_RUNNING

, , ,
try_to_wake_up() TASK_RUNNING,
activate_task() schedule() .
remove_wait_quaue () .
. 4,3. (sleeping) (wake up)

, , . , ,
, .
k e r n e l / s c h e d .
l o a d _ b a l a n c e (). .
s c h e d u l e (), . 1 , , 200
. l o a d _ b a l a n c e () ,
, .
, , . , load b a l a n c e ( ) s c h e d u l e ( ) ,
,

. , .
, , . 4.4.

84

load_balancer()

20

4
5
6

15


1,
20


2,
15

. 4.4.

load_balance ()
, , , .
load_balance () find_busiest_queue ()
.
. , 25% , ,
f ind_busiest_queue () NULL
load_balance ().
.
load_balance () , .
,
, ,
(.. , not "cache hot").
, , .
load_balance () ,
( ), , .
, , -
. ,
, p u l l _ t a s k ()
.
,
.
, , load_balance ().

85

load_balance (), ,
.
static int load_balance(int this_cpu, runqueue_t *this_rq,
struct sched_doraain *sd, enum idle_type idle)
{
struct sched_group *group;
runqueue_t *busiest;
unsigned long imbalance;
int nr_moved;
spin_lock(&this_rq->lock);
group = find_busiest_group(sd, this_cpu, &imbalance, idle);
if (!group)
goto out_balanced;
busiest = find_busiest_queue(group) ;
if (!busiest)
goto out_balanced;
nr_moved = 0;
if (busiest->nr_running > 1) {
double_lock_balance(this_rq, busiest);
nr_moved = move_tasks(this_rq, this_cpu, busiest,
imbalance, sd, idle);
spin_unlock(&busiest->lock);
}
spin_unlock(&this rq->lock);
if (!nr_moved) {
sd->nr_balance_failed++;
if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) {
int wake = 0;
spin_lock(abusiest->lock);
if (!busiest->active_balance) {
busiest->active_balance = 1;
busiest->push_cpu = this_cpu;
wake = 1;
}
spin_unlock(&busiest->lock);
if (wake)
wake_up_process(busiest->migration_thread);
sd->nr_balance_failed = sd->cache_nice_tries;
)
} else
sd->nr_balance_failed = 0;
sd->balance_interval = sd->min_interval;
return nr_moved;

86

out_balanced:
spin_unlock (&this_rq->lock) ;
if (sd->balance_interval < sd->max_interval)
sd->balance_interval *= 2;
return 0;
}


, .
context_switch(), kernel/sched.. schedule (), .
.
switch_mm (), include/asm/
mmu_context.h
.
s w i t c h _ t o () , i n c l u d e /asm/
system.h,
. .
, schedule ().
,
, .
need_resched , ,
schedule () (. 4.2). schediiler_tick (), ,
try_to_wake_up (), ,
, .
, , schedule ()
. ,
,
.
4.2. n e e d _ r e s c h e d

need_resched

set_tsk_need_resched (task)

clear_tsk_need_resched (task) need_resched


need_resched()

need_resched
. t r u e ,
, f a l s e ,

87

, need_resched . ,
, .
, , (-
current ,
). ,
2.2. 2.2 2.4 t a s k _ s t r u c t
i n t . 2.6 t h r e a d info. ,
.


(user preemption) , ,
need_resched , , . , "" . ,
,
.
, need_resched. , ,
. ,

entry.S ( ,
, ). ,
.
.
.


Linux,
Unix, (, preemptible). . , ,
- , .
, ( )
. 2.6, Linux : , ,
,
.
? , , -

88

. ,
, . (SMP-safe), ,
.
, ,
p r e e m p t _ c o u n t t h r e a d _ i n f
.
, . . , , n e e d _ r e s c h e d
p r e e m p t _ c o u n t . n e e d _ r e s c h e d
preempt__count , ,
. . p r e e m p t _ c o u n t , ,
. . , ,
preempt_count . , , , n e e d _ r e s c h e d . , . ,
9.
, s c h e d u l e () .
, , , . ,
s c h e d u l e (), , .
.
.
.
, , s c h e d u l e ( ) .
, , , .. ( s c h e d u l e ()).


Linux (real-lime): SCHED_FIFO SCHED_RR.
SCHED_OTHER , .. . SCHED_FIFO
" " (first-in
first-out, FIFO) .
SCHED_FIFO
SCHED_OTHER.

89

SCHED_FIFO , , .
,
SCHED_FIFO, (roundrobin). , SCHED_FIFO, , , .
SCHED_RR SCHED_FIFO, ,
, . , SCHED_RR
SCHED_FIFO , .. (round-robin)
. SCHED_RR, . , .
SCHED_FIFO,
, SCHED_RR, .
. . , ,
, .
Linux (soft real-time).
,
,
.
(hard real-time)

. Linux . Linux
, ,
. Linux , , Linux .
2.6 .
1 MAX_RT_PRIO 1,
MAX_RT_PRIO 100, 1 99.
nice
SCHED_OTHER, MAX_RT_PRIO (MAX_RT_PRIO+40).
, nice -20 +19 100 139.

90

Linux
. , , ,
(yield) .
, (man pages), ( ,
). . 4.3
. , , 5, " ".
4.3.

nice ()
schedsetscheduler

()

nice

sched_getscheduler

()

sched_setparam ()
sched_getparam ()

sched_get_priority_max ()
Eched_get_priority_min ()
sched_rr_get_interval ()

sched_setaffinity()
sched_getaffinity
sched_yield ()

()

,

sched_setscheduler () sched_getcheduler ()
. ,
,
, .
p o l i c y r t _ p r i o r i t y
t a s k _ s t r u c t .
sched_setparam () sched_getparam () .
r t _ p r i o r i t y , sched_param. sched_get_priority_max ()

91

sched_get_priority__min () .
(MAX_USER_RT_PRIO-1), - 1.
nice () .
root , ..
nice . nice ()
s e t _ u s e r _ n i c e (), s t a t i c _ p r i a
prio task_struct.


Linux
(processor affinity). ,
: " ".
cpus_allowed t a s k _ s t r u c t .
. 1,
.
sched_setaffinity ()
. sched_getaffinity () cpus_allowed.

. -, .
, . -, , (migration threads) . ,
,
cpus_allowed .


Linux sched_yield ()
,
.
( ,
) .
, ,
, ,
.
,
. ( ).

. Linux s c h e d _ y i e l d () .
.

,
, s c h e d _ y i e l d ().
, , y i e l d () , , TASK_RUNNING,
s c h e d _ y i e l d ( ) . s c h e d _ y i e l d ().


, ( , ) . , ,
. , ,
, , , . , Linux
,
, .
, , (
) ,
, , ,
.
. - .
NUMA (
) , NUMA- . (scheduler domain) , ;
2.6 .
,
Linux. ,
, .

93

, , , . .
,
, ( ). ,
, , , , , .
, .
. -,
. ,
, , , , . -, .
, . ,
, - ,
. ,
,
3, " ".
, . Linux ,
; . , /,
.

, Linux 1
, .
Linux.

API, POSIX
, (Application Programing Interface, API). ,
,
, , .
API , . , ,
. , , API .

Unix- POSIX. POSIX
I2, ,
Unix. Linux POSIX.
POSIX
API . Unix- API, POSIX, . , POSIX ,
, Unix,
. , , OS
Unix, Windows NT, , POSIX.
Linux,
Unix-, .
Unix-,
. , , , ,
, .
1

x86 250 (
). ,
.
2

IEEE, eye-trple-E ( , Institute of


Electrical and Electronics Engineers) , ,
POSIX. : h t t p : / / w w n . i e e e . o r g .

96

p r i n t f ()

->

p r i r t f ( )

->

w r i t e ( )

w r i t e ( )

>

>

. 5.1. ,
p r i n t f ( )

API POSIX.
, :
, , API. , : ,
, . - , .
Unix " ,
". , , . , , .

syscall
( syscall Linux)
.
(inputs), 3 ,
,
. long 4 , . , ,
, .
( ) .
Unix e r r n o . y p e r r o r ().
, , . , g e t p i d () , , PID .
.

"". (.. - ), , , , getpid (),


.
4

long 64- .

97

asmlinkage long sys_getpid(void)


{
return current->tgid;
)
, .
, , , . , , ,
( )5.
, . -,
asmlinkage . ,
.
. -, , g e t p i d () , s y s _ g e t p i d ().
Linux: b a r () sys_bar ( ) .


Linux
(syscall number).
.
, .
.
, . ,
. Linux " " ("not implemented")
s y s _ n i _ s y s c a l l (), , ,
, -ENOSYS, , . " " , .

. , s y s _ c a l l _ t a b l e .
e n t r y . S .
s y s c a l l .

, , g e t p i d () tgid, (thread group ID)? ,


TGID PID. TGID . getpid () PID.

98


Linux ,
.
.
.
, .


.
, ,
. ,
" ".
-
, ,
, .
, ,
: (exception) .
(system call handler). 8 i n t $0x80.
128, .
s y s t e m _ c a l l ().
e n t r y . S 6 . , sysenter.
, i n t . .
, , , ,
, .


, ,
. .

x86. ,
.

99

86
, .
.
.
system_call() NR_syscalls. NR_syscalls, -ENOSYS.
:
call *sys_call_table(,%eax,4)

32 (4 ),
4 (. 5.2).


read()

read()


system_call()


sys_read()

read ()

sys_read()

. 5.2.


, .
-
.
:
. 86 ebx, ecx, edx, e s i , edi .
, ,
, .
. 86 .

100


Linux
.
Linux .
. .
, , Linux.
,
.. . . ( ,
, ,
) Linux
. , ,
i o c t l ( ) .
, ? , . , ,
.
. ? . , , .
, .
?
. 19, "", . ,
. Unix:
" , ".
, , .
Unix .
, !


,
, . , , .
, - , . , , , PID .
,
.

101

,
. , , , , !
, , , . ,
, .
. ,
.
. ,
.
. . ,
.

. ,
!
.
copy_to_user ().
: ; ; , , .
copy_from_user (),
c o p y _ t o _ u s e r (). ,
, , , .
, . .
-EFAULT.
,
copy_from_user () c o p y _ t o _ u s e r () . s i l l y _ c o p y ()
. . , .
.
/*
* silly copy ,
* len ,
* src, ,
* dst,
* . !
*/
asmlinkage long sys_silly_copy(unsigned long *src,
unsigned long *dst, unsigned long len)
}

102

unsigned long buf;


/* ,
, */
if (len != sizeof(buf))
return -EINVAL;
/* src,
, buf */
if (copy_from_user (&buf, src, len))
return -EFAULT;
/* buf dst,
*/
if (copy_to_user (dst, &buf, len) )
return -EFAULT;
/* */
return len;
}
, , copy_from_user () c o p y _ t o _ u s e r ( ) , . , , , , ,
.
, - (page fault handler)
.
.
Linux s u s e r () , root. , root.
" " (capabilities). . c a p a b l e ()
, , , , .
, c a p a b l e (CAP_SYS_NICE) , nice .
, , root, .
, , .
asmlinkage long sys_am_i_popular (void)
{
/* ,
CAP_SYS_NICE */
if (!capable(CAP_SYS_NICE))
return -EPERM;
/* , */
return 0;
}

" " , , <linux/capability.h>.


103


3, " ", . current
, , .
(,
schedule ()), .
. ,
. 6, " ",
7. , , , , ,
. , , . , . , ,
8, " ", 9,
" ".

system_call (),
, .


,
.
. ,
( ).
, . , .

include/linux/unistd.h.
( 8).
- kernel/.
7

, ,
,
.
8

. ,
, . . .

104


, f (). sys_f () . e n t r y . S .
ENTRY(sys_call_table)
.long sys_restart_syscall
.long sys_exit
.long sys_fork
.long sys_read
.long sys_write
.long sys_open
/*
...
.long sys_timer_delete
.long sys_clock_settime
.long sys_clock_gettime
.long sys_clock_getres
.long sys_clock_nanosleep

/* 0 */

*/

/* 280 */

:
.long sys_foo
, 283,
. ,
, (
). .
, ,
.

include/asm/unistd.h, .
/*
* This file contains the system
*/
#define __NR_restart_syscall
#define __NR_exit 1
#define __NR_fork 2
#define __NR_read 3
#define __HR_write 4
#define __NR_open 5
...
#define __NR_mq_unlink
#define __NR_mq_timedsend
#define __NR_mq_timedreceive
#define __NR_mq_notify
#define __NR_mq_getsetattr

call numbers.
0

278
279
280
281
282

105

.
#define __NR_foo 283
f (). ,
k e r n e l / s y s . . . ,
, s c h e d . .
/*
* sys_foo - .
*
*
*/
asmlinkage long sys_foo(void)
{
return THREAD_SIZE;
}
! . foo ().



.

( ,
).
, , g l i b c !
, Linux - .
i n t 50x80. s y s c a l l n ( ) ,
. , , ,
, ,
. , open (),
.
long open(const char "filename, int flags, int model
.
#define NR_open 5
_syscall3(long, NR_open, const char *, filename, int, flags, int, mode)
open ( ) .
2 + 2*n .
. . -

106

, . NR_open,
<asm/unistd.h>, . , .
, ,
.
, open ().
,
, , .
#define NR_foo 283
__syscallO(long, foo)
int main ()
{
long stack_size;
stack_size = foo () ;
printf (" %ld\n", stack_size);
return 0;
}


,
, . ,
, . "" "" .
"".
.
Linux
.
"".
, .
, " ". , .
.
"
".

107

.
r e a d () w r i t e ()
, i o c t l ()
.
, ,
. .

sysfs.
, .
Linux
, .
,

(deprecated) (.. , ).
,
Linux .
2.3
2.5. .


,
(API). , Linux,

: , , .
, ,
.
! ,
.
, ,
, .
"" ""
.

108

, , .
. , ,
,
, . ,
, .

(polling).
. , , ,
,
. , .
(interrupt).

. , (
, ) , , .
,
.
, .
, . ,
, .

,
. .
, . ,
.

, . ,
, , ,
.
, . .
, ,
(interrupt request lines, IRQ lines).
. , PC IRQ, 0, , a IRQ, 1, .
. , PCI, ,
. ,
PCI, .
, , . ,
, "! ! !.

(exceptions) . , . , .
(, ) , (, - , page fault).
, , , .
, (, ), (, ).
.
, 86
. ,
. ,
, , .

110


, , (interrupt handler) -

(interrupt service routine). , ,


. ,
, , . - , .
Linux , .
, , . ,
, ,
,
(interrupt context), .
, , , .
, . ,
, ,
, . ,
,
, .
. ,
.
, , ,
. , .


, ,
, , ,
. ,
, .
(top half)
, ,
. ,
, ( )
(bottom half). , ,
.
.

111

, 7, " ".

.
, ,
. ,
. : ", ! !.
.
,
, , . ,
.
. , .


, . , ( ), .
.
/* request_irq: */
int request_irq(unsigned int irq,
irqreturn_t (*handler)(int, void *, struct pt_regs *),
unsigned long irqflags,
const char * devname,
void *dev_id);

, irq, . , , , , , , , . (probing)
.
, handler, , . ,
. -. i r q r e t u r n _ t .
.
, i r q f l a g s ,
.

112

SA_INTERRUPT. ,
. , Linux .
, , , ,
. : .
,
. ( )
, , . ,
, .
SA_SAMPLE_RANDOM. , ,
, . . , ,
, . ,
(, , ) (, , ).
,
.
.. , "
".
SA_SHIRQ. , (shared). , ,
.
. .
, devname, ASCII-, ,
. , "keyboard".

/proc/irq /proc/interrupts, .
, d e v i d , ,
. ( ), dev_id (cookie), .
,
.
, NULL,
,
(cookie) ( ISA,
, , ).

113

. (
), , , , .
r e q u e s t _ i r q () .
, .
-EBUSY, ,
( ,
SA_SHIRQ).
, r e q u e s t _ i r q ()
(sleep) , , , , .
, request_irq()
, .
, ,
r e q u e s t _ i r q ( ) - . . /proc/irq.
proc_mkdir ()
procfs. proc_create () p r o c f s ,
kmalloc () . 11, " ", kmalloc () . !

.
if (request_irq(irqn, my_interrupt, SA_SHIRQ, "my_device", dev)){
printk(KERN_ERR "my_device: cannot register IRQ %d\n", irqn);
return -EIO;
}

irqn , my_interrupt , ,
"my_device", dev dev_id.
, , .
, . .
, ,
.



void free_irq(unsigned int irq, void *dev_id)

114

, .
, , dev_id. ,
. , dev_id.
, , , f r e e _ i r q ( ) . ,
devoid NULL, ,
.
free_irq() .
6 . 1 .

request_irq
f r e e _ i r q ()

()

.
,


.
static irqreturn_t intr_handler (int irq, void *dev_id, struct pt_regs *regs)

, , request_irq (). , irq, , .


, . ,
2.0, dev_id, i r q , , ,
(
).
, dev_id, , , request_irq () .
, ,
, , . , ()
(device structure) , , ,
, dev_id
.
, regs, ,
, . ,

.. _
115

. ,
. ,
, .

i r q r e t u r n _ t . : IRQ_NONE
IRQ_HANDLED. ,
, , , . ,
, , . , IRQ_RETVAL (x).
, IRQ_HANDLED, , IRQ_NONE. , () .
, , IRQ_NONE, . ,
, i r q r e t u r n _ t ,
int. , , .
2.6 void.
typedef i r q r e t u r n _ t
void 2.4 .
s t a t i c ,
.
, . , , .
, . ,
, .

Linux .
, ,
. , , . ,
.
.


(shared) ,
. , , .
SA_SHIRQ flags request_irq ().

116

dev_id .
, . ,
,
, ,
.
dev_id NULL!
, , . , .
,
, .
, , .
, , , .
request_irq () SH_SHIRQ, ,
SH_SHIRQ. , 2.6,
, "" SA_INTERRUPT.
, , . ,
,
. ,
. , (status register)
, .
.


,
RTC (real-time clock, ),
d r i v e r s / c h a r / r t c . . RTC , (PC). , ,
(alarm)
(periodic timer). ( ) -
(I/O range). .
: , .
RTC r t c i n i t ()

117

. . .
if (request_irq(RTC_IRQ, rtc_interrupt, SA_INTERRUPT, "rtc", NULL) {
printk(KERN_ERR "rtc: cannot register IRQ %d\n", rtc_irq);
return -EIO;
}
,
RTC_IRQ, . ,
IRQ 8. , r t c i n t e r r u p t ,
SA_INTERRUPT. ,
" r t c " .
- , dev_id
NULL.
, .
/*
* .
* SA_INTERRUPT,
* set_rtc_mmss ()
* ( rtc
*
* ). ,
* - rtc_lock,
*
* . ( set_rtc_mmss()
* ./arch/XXXX/kernel/time.c)
*/
static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
/*
* ,
* .
* ()
* ,
* rtc_irq_data
*/
spin_lock (&rtc_lock);
rtc_irq_data += 0x100;
rtc_irq_data &= ~Oxff;
rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & OxFO);
if (rtc_status & RTC_TIMER_ON)
rnod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);

118

spin_unlock(&rtc_lock);
/*
*
/*
spin_lock(&rtc_task_lock);
if (rtc_callback)
rtc_callback->func(rtc_callback->private_data);
spin_unlock(&rtc_task_lock);
wake_up_interruptible(&rtc_wait);
kill_fasync(&rtc_async_queue, SIGIO, POLL_IN);
return IRQ_HANDLED;
}

,
RTC. , -: ,
rtc_irq_data SMP-, rtc_callback.
9, " ".
r t c _ i r q data RTC m o d t i m e r (). 10,
" ".
, -, (callback), . RTC , ,
RTC.
IRQ_HANDLED,
, .

, RTC ,
IRQ_HANDLED.


,
. , , , , .
current . , ,
..

119

, . c u r r e n t ( , ).
,
(sleep) , ?
. ,
, , .
, .
.
- (busy loop) .
. , (, !).

. , .
.
, . 1.
, 8 32- 16 64- .
,
, . ,
,
.
2.6
, 4 32-
. ,
,
. ,
, .
. , , ,
.
, . .

- . ,
(idle task).
120


, , Linux . , ,
.
. 6.1 ,
.

handle_IRQ_event()

do_IRQ()

ret_from_int()

. 6.1.


. ( ), .

. ( , ), , , ,

, . .

, . , .
IRQ .
( ).
do_IRQ (). , ,
,
.

121

do_IRQ() .
unsigned int do_IRQ(struct pt_regs regs)

, p t r e g s ,
. , do_IRQ () . 86 .
int irq = regs.orig_eax & 0xff;
, do_IRQ()
. PC,
mask_and_ack_8295A (), do_IRQ ().
do_IRQ () ,
, . ,
handle_IRQ_event (),
. 86
handle_IRQ_event () .
int handle_IRQ_event (unsigned int irq, struct pt_regs *regs,
struct irqaction *action)
{
int status = 1;
if (!(action->flags & SA_INTERRUPT))
local_irq_enable ();
do {
status != action->flags;
action->chandler (irq, action->dev_id, regs);
action = action->next;
} while (action);
if (status & SA_SAMPLE_RANDOM)
add_interrupt_randomness (irq);
local_irq_disable();
return status;
}
, ,
SA_INTERRUPT . ,
SA_INTERRUPT , . . , . .
a d d _ i n t e r r u p t _ r a n d o m n e s s (),
SA_SAMPLE_RANDOM. , . , " ",
.

122

( do_IRQ () , ). do_IRQ ()
,
ret_from_intr().
r e t _ f r o m _ i n t r ( ) , , . , ( 4, " ", n e e d _ r e s c h e d ) .
(.. ), s c h e d u l e () .
(.. ),
s c h e d u l e () , p r e e m p t _ c o u n t
( ),
s c h e d u l e () ,
, .
86, , ,
a r c h / i 3 8 6 / k e r n e l / e n t r y . S ,
a r c h / i 3 8 6 / k e r n e l / i r q . . .

/proc/interrupts
procfs ,
/.
procfs ,
. //
i n t e r r u p t s , , ,

.
CPU0
0: 3602371
1: 3048
2: 0
4: 2689466
5: 0
12: 85077
15: 24571
NMI: 0
LOC: 3602236
ERR: 0

XT-PIC
XT-PIC
XT-PIC
XT-PIC
XT-PIC
XT-PIC
XT-PIC

timer
i8042
cascade
uhci-hed, ethO
EMU10K1
uhei-hcd
aic7xxx

.
0-2, 4, 5, 12 15. ,
, .
. , .

123

, 3 . 6 0 2 . 3 7 1 2 ,
(EMU10K1) ( , , ). , . XT-PIC
PC (PC programmable
interrupt controller). I/ APIC IO-APIC-level
IO-APIC-edge. , , . dev_name
request_irq () , . , 4 ,
, .
, , procfs,
fs/proc. , /proc/interrupts,
show_interrupts () .


Linux .
.

<asm/system. h> <asm/irq. h>. . 6.2 .
, ,
, . ,
. ,
. ,

. Linux , ,
. . ,
. 8 9 .
.

10, " ", ,


( HZ) ?

124


(
)
.
local_irq_disable();
/* .. */
local_irq_enable();
(, , ). 86
l o c a l _ i r q _ d i s a b l e () c l i , 11_
i r q _ e n a b l e () s t i . ,
86, s t i c l i , (set) (clear) (allow interrupt flag).
, .
l o c a l _ i r q _ d i s a b l e () , .
l o c a l _ i r q _ e n a b l e () , ( l o c a l _ i r q _ d i s a b l e ()) .
, ,
. ,
, , . ,
. , ,
. , , .
.
unsigned long flags;
local_irq_save(flags);
/* . . */
local_irq_restore (flags) ;
/* ..*/
, , f l a g s . ,
. ,
(SPARC), , f l a g s ( ,
). .

125

,
.
cli ()
,
. , - ,
, . c l i ( ) ,
s t i ( ) ; "86-" (
). 2.5, , ,
- ( 9, " ").
, ,
, , .
, , , c l i ( ) , . c l i () , (
) . , ,
c l i ( ) , ,
, c l i ( ) , ..
s t i ().
c l i () . -, . , ,
c l i (). -,
.
.


, .
. . , . Linux
.
void
void
void
void

disable_irq(unsigned int irq);


disable_irq_nosync(unsigned int irq);
enable_irq(unsigned int irq);
synchronize_irq(unsigned int irq);

. . , d i s a b l e _ i r q () ,
, , . ,
126

, , . d i s a b l e _ i r q _ n o s y n c () .
s y n c h r o n i z e _ i r q () , , , , .
, .. d i s a b l e _ i r q () d i s a b l e _ i r q _ n o s y n c ()
e n a b l e _ i r q ( ) . e n a b l e _ i r q ()
. ,
d i s a b l e _ i r q () ,
, e n a b l e _ i r q () .

(sleep). ! ,
(,
, ,
).
,
. , .
3.
PCI , . d i s a b l e _ i r q ()
, .


(,
,
).
i r q _ d i s a b l e d (), <asm/system.h>, , . .
, .
in_interrupt()
in_irq()
. , .
, . i n _ i r q ( )
, .
3

, ISA, , . - ISA-
. PCI , PCI .
.

127

, , .. , .
, -,
, . i n _ i n t e r r u p t () ,
.
6.2.

local_irq_disable()

local_irq_enable()

local_irq_save(unsigned

long

flags)

local_irq_restore(unsigned

long flags)

disable_irq(unsigned

irq)

int

disable_irq_nosync(unsigned
enable_irq(unsigned

int

int

irq)

irq)

irqs_disabled()

, ,

in_interrupt()

, ,

in_irq()

, ,

, !
, , .
, .
,
. ,
, , . , ,
, , -

128

, , .

, , . . 6.2
.
( ,
), .
.
. .
.

129


, , . ,
. , , . .

( ).
.

(
SA_INTERRUPT) .
.
,
.
,
.
,
. ,
, , . , ,
, .
.
(top half, )

, . (bottom half).


, .
, ,
(.. ) .
.
, , . .
, .
. , ,
, , . , , , , .
, .
, , , .
. ,
,
.
,
.
,
.
, ( ) , .
.

. , : "
, ". , ,
.

132


, . , ,
. , , SA_INTERRUPT,
( ). , ,
. , ,
, , . .
? , . ,
,
.
Linux, .
, . ( ) , . ,

.


,
,
. ,
. , .
Linux . ,
. , " ".
, Linux
2.6. , ,
.
, ,
, .

133

Linux
,
" " ("bottom half"). , .
"" , "bottom half ( ).
, .
32 .

32-
. , .. , . , ;
, .
(task queue) . .
,
. , ,
, , .
. , ,
. , "" ,
.
2.3 1 (softirq) (tasklet).

,
2.
- 32 ,
, .
, ,

, 3.

softirq , " ", , ( ) " ". (. .)


2
,
.
2.5 - .
3

task (). (softirq).

134

,
.
, . .
, , , . , . ,
. , ,
.
, , (software interrupt, softirq). ,
. , ,
, "" .
2.5 ,
. ,
(work queue). ,
.
, 2.6
: ,
. , .

. , , . , ,
- . , .
, , . 10, " ".


,
. .
" " ("bottom half") , ,
. Linux
. , , .

135


"soflirq", .
"Bottom Half" Linux. "", , " " ("bottom half") .

2.5.
: (softirq),
. softirq,
. . 7.1 .
7 . 1 .

2.5
2.5

2.3

2.3

2.3

, .

(softirq)
softirq. .
. softirq,
softirq . , , k e r n e l / s o f t i r q . .


. , . softirq_action,
< l i n u x / i n t e r r u p t . h > .
/*
* ,
*/
struct softirq_action
{

136

void (*action)(struct softirq_action * ) ;


/* , */
void *data;
/* */
};

32 kernel/sof t i r q .
.
static struct softirq_action softirq_vec[32];

. , 32
softirq. , . softirq . 32
4.

softirq
, action, .
void softirq_handler(struct softirg_action *)

, action
softirq_action
. , rny_softirq
softirq_vec, - .
my_softirq->action(my_softirq)

, ,
, data.
. data
data.
softirq. , , softirq, .
( ) .


,
.
(rise softirq).
.
. .
4

,
softirq, .

137

.
ksoftirqd.
, , , , .
, ,
do_softirq (). -
. , do_softirq()
.
do_sof t i r q ().
u32 pending = softirq_pending(cpu);
if (pending) {
struct softirq_action *h = softirq_vec;
softirq_pending(cpu) = 0;
do {
if (pending & 1)
h->action (h);
h++;
pending = 1;
} while (pending);
}

.
.
pending , softirq_pending (). 32- . n, .
, 5.
h softirq_vec.
, pending, ,
h->action(h).
h , softirq_vec.
,
pending, .
. ,
..
5
, . ,
( ),
.

138

h ,
. .
,
. , . ,
, , h
softirq_vec, pending 32
32 .



.
SCSI softirq. ,
. , , .
,
. ,
. , , , softirq .


(enum) <linux/interrupt.h>. , , . .

(enum). , . , . , HI_SOFTIRQ
, a TASKLET_SOFTIRQ . ,
, -
TASKLET_SOFTIRQ. . 7.2 .
7.2.

HI_SOFTIRQ

TIMER_SOFTIRQ
NET_TX_S0FTIRQ
NET_RX_SOFTIRQ
SCSI_SOFTIRQ

1
2
3
4

TASKLET_SOFTIRQ




SCSI

139


o p e n _ s o f t i r q ( ) , : , -
d a t a . , , .
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
(sleep).
.
.
, ,
, . ,
, , , , (
). , .
.
,
, . ,
,
( , ), - ,
.
.
, . ,
.



o p e n _ s o f t i r q () , .
, ,
d o _ s o f t i r q ( ) , r a i s e _ s o f t i r q ( ) .
, .
raise_softirq(NET_TX_SOFTIRQ);
NET_TX_SOFTIRQ.
n e t _ t x _ a c t i o n () .
, , .

140

, raise_sof t i r q _ i r q o f f (), .
/*
* !
*/
raise_softirq_irqoff(NET_TX_SOFTIRQ);

.
, , . d o _ s o f t i r q ( ) . ,
. "
" " ".

, . , (task).
.
.
, , :
. ,
.
, . .


, (softirq). , : HI_SOFTIRQ TASKLET_SOFTIRQ.
, HI_SOFTIRQ TASKLET_SOFTIRQ.


t a s k l e t _ s t r u c t . .
<linux/interrupt.h> .
struct tasklet_struct {
struct tasklet_struct *next;
unsigned long state;

/*
*/
/* */

141

atomic_t count;
/* */
void (*func) (unsigned long); /* - */
unsigned long data; /* - */
);
f u n c - ( a c t i o n
, ), d a t a
.
s t a t e : , TASKLET_
STATE_SCHED TASLET_STATE_RUN. TASKLET_STATE_SCHED
, , TASLET_STATE_RUN
. TASLET_STATE RUN ,
, (,
, , , ).
c o u n t .
, ; ,
,
.


(scheduled) ( ) 6 ,
: t a s k l e t _ v e c ( )
t a s k l e t _ h i _ v e c ( ).
t a s k l e t _ s t r u c t .
t a s k l e t _ s t r u c t .

t a s k l e t _ s c h e d u l e () t a s k l e t _ h i _ s c h e d u l e ( ) , t a s k l e t _ s t r u c t .
( ,
TASKLET_SOFTIRQ, HI_SOFTIRQ). . t a s k l e t _ h i _ s c h e d u l e (), .
, s t a t e TASKLET_STATE_
SCHED. , .

. ,
.
, , t a s k l e t _ v e c t a s k l e t _ h i _ v e c ,
.
6

. (softirq) (rise), (lasklet) (schedule)? ? ,


.

142

TASKLET_SOFTIRQ I_
SOFTIRQ,
d o _ s o f t i r q ( ) .

.
d o _ s o f t i r q ( ) ,
. , , , d o _ s o f t i r q () , . TASKLET_SOFTIRQ
HI_SOFTIRQ , d o _ s o f t i r q ()
. ,
t a s k l e t _ a c t i o n () t a s k l e t _ h i _ a c t i o n () - , .
, t a s k l e t _ v e c t a s k l e t _
hi_vec .

.
( , ,
).
.
,
,
TASLET_STATE_RUN. ,
(,

).
, TASLET_STATE_RUN,
.
c o u n t , ,
. ( c o u n t ),
, .
, ,
( ) c o u n t . . ,
TASLET_STATE_RUN s t a t e .
,
, .

143

, . ,
TASKLET_SOFTIRQ
HI_SOFTIRQ. ,
. , , ,
. ,
(
).
.


,
. , , .


. , , ( )
: . ( , )
, < l i n u x / i n t e r r u p t s ,h>;
DECLARE_TASKLET(name, func, data)
DECLARE_TASKLET_DISABLED(name, func, data);
s t r u c t _ t a s k l e t _ s t r u c t
(name). , func, d a t a . ( c o u n t ) .
, c o u n t , , ,
. count, , , , .
.
DECLARE_TASKLET(my_tasklet, my_tasklet_handler, dev);
.
struct tasklet_struct rny_tasklet = { NULL, 0, ATOMIC_INIT(0),
tasklet_handler, d e v ) ;
m y _ t a s k l e t ,
. t a s k l e t _ h a n d l e r .
d e v -
.
, s t r u c t
t a s k l e t _ s t r u c t * t , .
t a s k l e t _ i n i t ( t , t a s k l e t _ h a n d l e r , dev); /* , */
144

-
- .
void tasklet_handler(unsigned long data)
,
(). , , . ,
(,
), . ,
,
.
, (. 8, "
" 9, " ").


,
t a s k l e t _ s c h e d u l e (),
t a s k l e t _ s t r u c t .
tasklet_schedule(&my_tasklet) ; /* , my_tasklet
*/
,
. , , , ,
. , ,
, . ,
, .
t a s k l e t _
d i s a b l e ( ) . ,
, .
t a s k l e t _ d i s a b l e _ n o s y n c ( ) , , , . , ,
. t a s k l e t _ e n a b l e ()
. , , DECLARE_TASKLET_DISABLED (),
.
tasklet_disable(&my_tasklet); /* */
/* , , , . */
tasklet_enable(&my_tasklet); /* */
, ,
t a s k l e t _ k i l l ( ) . -

145

t a s k l e t _ s t r u c t .
,
, .
, , . , , ,
. , .

ksoftirqd
(softirq) , ,
(
).
, .
,
, .
(,
, ). , -
( ). , -
, ( ,
). ,
, ,
. ,
. , ,
.
.
,
,
. , , ,
. ,
, .
- .
,
, . ,
. ,
, .
-
.

.
146


. -
, , . , ,
, ,
, ( ) . , . , ,
, . ,
, ,
, .
- . , ,
.
, , (wake up) , .
( nice 19). ,
- . - . . , ,
. , ,

( ).
.
k s o f t i r q d / n , . k s o f t i q d / 0 k s o f t i r q d / 1 . To, , ,
, .
, , .
for (;;) {
set_task_state(current, TASK_INTERRUPTIBLE);
add_wait_queue(&cwq->more_work, &wait);
if (list_empty(&cwq->worklist))
schedule () ;
else
set_task_state(current, TASK_RUNNING);
remove_wait_queue(&cwq->more_work, &wait);
if

(!list_empty(&cwq->worklist))
run workqueue(cwq);

147

, (
s o f t i r q _ p e n d i n g ( ) ) , k s o f t i r q d d o _ s o f t i r q () , . , , .
s c h e d u l e (), .
, TASK_INTERRUPTIBLE
.
, d o _ s o f t i r q () ,
.


, , 2.6,
. ,
, , , , . 2.6,
.
, .
, 32.
,
. . , 32, .
, , .
, . ,
,
. , .
, ,
. , 2.4 . , 32, ,
< l i n u x / i n t e r r u p t . h > . , mark_bh ()
. 2.4 ,
b h _ a c t i o n ( ) . 2.4 , .
,
(task queue),
. ,
.
148

2.3 (softirq)
(lasklet), .
. ,

,
7.
2.5 , SCSI ( , ) - . , .
, !


(work queue)
, .
. ,
, ,
, . ,
, , (sleep).
, : /, .
,
.
, .
.
(
), . .
, ,
. , , , ,
,
. , , ,
-. , .
7

,
. , , ,
? , .

149



,
, - . (worker threads).
, . ,
,
.
, .
, , events/n,
. .
, events/0. events/1. ,
, , . , ,
,
. , ,
, .
.
, . ,
, . , ,
.


workqueue_
struct.
/*
*
:
*/
struct workqueue_struct {
struct cpu_workqueue_struct cpu_wq [NR_CPUS] ;
const char* name;
struct list_head list;
};

s t r u c t cpu_workqueue_struct, o
. , ,
, .

150

cpu_workqueue_struct kernel/workqueue.
. .
/*
* , :
*/
struct cpu_workqueue_struct {
spinlock_t lock; /* */
long rernove_sequence;

/*
( ) */
long insert sequence; /* */
struct list_head worklist;
/* */
wait_queue_head_t more_work;
wait_queue_head_t work_done;
struct workqueue_struct *wq;

/*
workqueue_struct */
task_t *thread; /* */
int run_depth;

/* run_workqueue() */

};
, ,
workqueue_struct.
c p u _ w o r k q u e u e _ s t r u c t , ,
,
.


, w o r k e r _ t h r e a d ().
. - , . , ,
.
w o r k _ s t r u c t , < l i n u x / w o r k q u e u e . h > .
.
struct work_struct {
unsigned long pending; /* ? */
struct list_head entry; /* */
void (*func)(void * ) ; /* - */
void *data;
/* - */
void *wq_data; /* */
struct timer_list timer; /* ,
*/
};

151

,
. , , ,
. , , .
w o r k _ s t r u c t .
, .
worker_thread () .
for (;;) {
set_task_state(current, TASK_INTERRUPTIBLE);
add_wait_queue(&cwq->more_work, &wait);
if (list_empty(&cwq->worklist))
schedule();
else
set_task_state(current, TASK_RUNNING);
remove_wait_queue (&cwq->more_work, &wait);
if (! list_empty (&cwq->worklist))
run_workqueue(cwq);
}
.
( TASK_INTERRUPTIBLE),
.
, schedule ()
.
, . TASK_RUNNING
.
, run_workqueue () .
1

run_workqueue ()
run_workqueue () , .
while (!list_empty(&cwq->worklist)) {
struct work_struct *work;
void (*f) (void * ) ;
void *data;
work = list_entry(cwq->worklist.next, struct work_struct, entry);
f = work->func;
data = work->data;
list_del_init(cwq->worklist.next);
clear_bit(0, &work->pending);
f(data);
}

152


, func workqueue_struct. .
, .
( func), ,
( d a t a ) .

.
.
.

,
, . . 7.1 , .
. .
.
. events ().
c p u _ w o r k q u e u e _ s t r u c t . w o r k q u e u e _ s t r u c t
.

cpu_workqueue_struct

workqueue_struct

work_struct

. 7.1. , ,

153

, ,
events falcon.
. , events (,
c p u _ w o r k q u e u e _ s t r u c t ) falcon ( c p u _ w o r k q u e u e _ s t r u c t ) .
events w o r k q u e u e _ s t r u c t ,
falcon .
. , .
w o r k _ s t r u c t . ,
, .
. .
, events. .
,
. , XFS
.


. , , events, .


, .

.
DECLARE_WORK(name, void (*func) (void *), void *data);
w o r k s _ t r u c t name, func - d a t a .

, .
INIT_WORK(struct work_struct *work, void (*func)(void *),void *data);
,
work, - func d a t a .


.
void work_handler (void *data)

154

, , , .
. ,
. , ,
, , .
, , ,
, , , .
, ,
. . .


, , . events, ,
.
schedule_work(&work);

,
events, ,
.
, , .
. .
schedule_delayed_work (&work, delay);

, w o r k _ s t r u c t ,
&work, , delay
. , , 10, " ".


, , , . , ,
, .
, , , , . , , .
, , events .
void flush_scheduled_work(void);

155

, events
. ,
.
.
, . ,
schedule_delayed_work () ,
flush_scheduled_work ( ) .

int cancel_delayed_work(struct work_struct *work);
, w o r k _ s t r u c t , .


, , .
, ,
.

.
struct workqueue_struct *create_workqueue(const char *name);
name , .
, e v e n t s , , .
struct workqueue_struct *keventd_wq = create_workqueue("events");
( ),
.
,
. , , s c h e d u l e _ w o r k ( ) s c h e d u l e _ d e l a y e d _ w o r k ( ) , , , , .
int queue_work struct workqueue_struct *wq, struct work_struct *work);
int queue_delayed_work(struct workqueue_struct *wq,
struct wesrk_struct *work, unsigned long delay);
,
flush_workqueue(struct workqueue_struct *wq);
f l u s h _ s c h e d u l e d _ w o r k ( ) ,
, , ,
.
156


, (softirq) (tasklet),
(task queue). ( tq), ,
(task), 8. ,
, 2.5.
, . ,
, . ,
, .
.
, scheduler queue ( ), immediate queue
( ) timer queue ( ). . keventd
, .
. ,
, "" . . , .
, . , , . ,
,
.
. , , . , , .
; . ,
. , keventd
, , .



, , . 2.6 :
(softirq), (tasklet)
(work queue). ,
8

, ,
, .

157

. , .
,
. ,
, .
, , , , , ,
. , ,
. ,
.
, ,
. ,
.
, , , , , , , (-CPU data), ,
.
,
.
, ,
(sleep), , , . ,
, , . , , , , .
.
. events, , . , .
, .
. 7.3 .
7.3.

(softirq)

(tasklet)


(work queue)

( )

158

,
. , , ..
- ? ,
. .
, .



, .
. , , . ,
. , , , .
, :
. , , .
( , ) .

( ), .
, , , ,
. , SMP , .
, ,
, .

SMP-.
, , .
, .
8 , . 9
, .

159

, , .


.
, , . , , 9.
.
( , , ),
local_bh_disable (). local_bh_enable (). , "" .
, . . 7.4
.
7.4.

v o i d local_bh_disable ()
v o i d local_bh_enable ()

(softirq)
(tasklet)
(softirq)
(tasklet)

local_bh_enable () . ,
local_bh_disable () . local_bh_disable ()
, .
, local_bh_enable ()
.
preerapt_count, (, )11.
, .
local_bh_enable () , .

11

,
. Linux . ,
, , , , (sleeping-while-atomic bug).

160

, ,
<asrn/softirq.h>.
.
/*
*
preempt_count
*/
void local_bh_disable(void)
{
struct thread_info *t = current_thread_info();
t->preempt_count += SOFTIRQ_OFFSET;
}

/*
* preempt_count ""
* ,
*
* ,

*/
void local_bh_enable(void)
{
struct thread_info *t = current_thread_info();
t->preempt_count -= SOFTIRQ_OFFSET;
/*
* preempt_count
- ?
* ,
/
if (unlikely(!t->preempt_count &&
softirq_pending (smp_processor_id())))
do_softirq();
}
. , .
"" (, ), .
, . 8 9.

161

, Linux, (softirq),
(tasklet) (work queue). ,
. ,
, , . , , Linux:
task queue.
, , . , ,
.
:
. ,
. , .

162

,
(shared memory), , . .
, 1
: ,
, , (, ) .

, , , . .
. , Linux , . ,
,
. , .
.
2.0, .
,
. , , ,
1

. , ,
, . . , .

. 2.6 Linux (). , (


) . , ,
.
, .
, Linux
(race condition, "").



, , (critical region).
.
, .. , , - ,
.
, .
, , (
"", race condition). ,
. ,
,
,
. , , ,
, .


, , .
;
( ATM, Automated Teller Machine, -).
,
. , , PIN, , ,
, ,
.
, , . , -

164

. , , .
int total = get_total_from_account(); / * */
int withdrawal = get_withdrawal_amount(); /* ,
*/
/* , */
if (total < withdrawal)
error(" !")
/* , :
*/
total -= withdrawal;
update_total_funds(total) ;
/* */
spit_out_money(withdrawal);
, . , . ,
, - ,
- (
), - .
, , ,
: , ,
, , .
. , $100, $10, , ,
( : ,
). , ,
$105. ,
.
, - : .
$105, , $105 $10, $95, $10 .
, , $95 $100.
, .
,
. , : $105
$100 $10.
$100 $105 $5.
$10 $105, $95. : $5. , $95.
!

165

, ,
. ,
. .


, .
:
:

i++
.
i .
, .
i .
, ,
, i 7.
( ).
1
i (7)
i 1 <7->8)
i (8)

i (8)
i 1 (8->9)
i (9)

, , 7,
9. .
1
i (7)
i 1 (7->8)
i (8)

2
i (7)
i 1 (7->8)
i (8)


i , 1,
. i
8, 9.
. ,
. ,
, 1
166

, . .
.
1
i 1 (7->8)

i 1 (8->9)
.
1

2
i 1 (7->8)

i 1 (8->9)
. . .
, . .

, . , ,
. ,
, ,
. :
, . ,
, .
, , .
, , , . ,
.
, .
, .
, ? ,
,
, . ,
; ,
(lock) , .
(lock) . ,
. , , , .
. ,
. ,
, .

167

, , , ,
, . ,
.
. ,
.
.
, .
.
. .
,
.
.
, , . ,
.
1
2

:
...

: ...
...
...

:
.,.

...

, (, advisory)
(, voluntary).
, .
,
. .
"" "".
Linux . ,
( , contended lock).
, ,
(busy wait2), , .
2

, "" (spin) , .

168

, Linux,
.
: " ,
. ,
, !"
, , , . , , ,
. , , , test-andset ( ),
,
. .


, , .. .

, ,
. ,
(, , ), . , .
, ,
, , (pseudo-concurrency).
. , (true concurrency). ,
.
.
. ,
, , .
. softirq
,
.
. , , , , .

169

.
, , , .
. .
, . ,
, , . ,
, .
.
, . , , , .

,
. , .
,
, , . , . -
. ,
, .
.
.
, , (iterrupt-safe). ,
, SMP- (SMP-safe). , ,
3
(preempt-safe). ,
,
.


, . ,
, . ,
, , , . , ,
, ,
3

, , , , SMP-, .

170

. , , ( , ,
), , . ,
, (
).
?
. : , ,
, - . - - .
, , .
: SMP UP
Linux ,
"" . (SMP),
C O N F I G _ S M P . {uniprocessor, UP) , , , ,
C O N F I G _ S M P , , , . , , -. C O N F I G _ P R E E M P T ( , , ).
,
, . C O N F I G _ S M P C O N F I G _ P R E E M P T
.
, .

.
? ,
, ?

? ?
, , , ?
()
- ? , ?
, ?
, ?
?
, , .

171


( , deadlock) ,
,
. ,
. , .
, , . ,
, ;
.
4 (self-deadlock).
, , , . , ,
.


,
...
n n . , ,
, ,
. ,
ABBA (ABBA deadlock).
1


,
,
. deadly embrace (.
).
. , ,
. .
. .
(deadly embrace). , .
4

, .
Linux, , . . ,
.

172

. : "
'?'''. - ,
- ?
.
, .
. ,
. , c a t , dog fox,
. ,
,
, . ,
, .
: c a t , dog fox, ( ) . ,
fox, dog, ( , ),
dog fox. , .
1
cat
dog
fox
fox

2
fox
dog
dog

1 fox, 2,
2 dog,
1. , , , . , .
,
. , . - .
/*
* cat_lock - dog
* ( dog fox)
*/
,
,
.
. Linux ,
. .

173



" " (lock contention, contention)
,
. (highly contended) ,
. , ,
.
, . ,
, "" ,
.
(scalability) , . , , , ,
.
,
. . , , .
Linux , 2.0. ,
Linux ,
.
2.2 , . 2.4 .
2.6 , .
(, granularity)
, ,
. , (fine grained), , . ,
, ,
.
(cource graine),
,
.

(runqueue), 4, " ". 2.4
(,

174

). 2.6 (1)-,
, . ,
. , , . ,
,
.
,
Linux
. ""
,
,
. .
.

, .
. ,
, . ,
. ,
? , ? (: .) ,

SMP-, ?
, , ,
.
. .

.
. , ,
. .
. .

175


SMP- , . , , -
. , , , , .
SMP,
, . , .
, ,
, , ,
Linux,
.

176

,
. , Linux . ,
, . , .


(atomic operations) ,
, .. .
, .
, ,
. , , , , (,
i 7).
1
i (7->8)

2
i (8->9)

9 . . ,
.
: ,
. , Linux.
,

( , ). - , SPARC,
.



a t o m i c t . , ,
i n t , . -, , , a t o m i c t , ,
.
, , . ,
, , .
atomic_t ,
( , ) . ,
,
. , a t o m i c t .
, atomic_t 32- , Linux, ,
24 . SPARC,
: 8 32- i n t ,
. 9.1.
32- atomic_t

24-
() 31

. 9.1, 32- atomic_t SPARC

, SPARC . , SPARC 24 . ,
32- , ,
SPARC, . ,
SPARC atomic_t, 178

32- , . 24-
SPARC,
<asm/atomic.h> .
,
, <asm/atomic.h>. ,
,
, .
,
.
a t o m i c _ t .
.
. atomic_t u; /* */
atomic_t v = ATOMIC_INIT(0); /* v
*/
.
atomic_set(&v, 4 ) ; /* v = 4 () */
atomic_add(2, &v) ; / * v = v + 2 = 6 () */
atomic_inc(&v); / * v = v + l = 7 () */
a t o m i c _ t i n t , a t o m i c _ r e a d ( ) .
printk("%d\n", atomic_read(&v)); /* "7" */

. , a t o m i c _ i n t ()
a t o m i c _ d e c ( ) , .
. ,
int atomic_dec_and_test(atomic_t *v)

. ,
t r u e , f a l s e . (.. , )
. 9.1. , , <asm/atomic.h>.

(
i n l i n e ) . - , . ,
.
, ,

179

.. ,
atomic_read() , atomic_t.
9 . 1 .

ATOMIC_INIT(int i)

i
a t o m i c _ t

i n t atomic_ read(atomic_t *y)

void atomic_set (atomic_t *v, i n t i)

v i

void atomic_add (int i, atomic_t *v)

i v

void atomic_sub(int i, atomic_t *v)

1 v

void atomic_inc(atomic_t *v)

void atomic_dec(atomic_t *v)

int atomic_sub_and_test(int i, atomic_t *v)

i v
t r u e , ,
f a l s e

int atomic_add_negative(int i, atomic_t *v)

i
v t r u e ,
, f a l s e

i n t atomic_dec_and_test (atomic_t *v)

v
t r u e ,
, f a l s e

int atomic_inc_and_test(atomic_t *v)

v
t r u e ,
, f a l s e


. , .
. , : , , ,
, , . , 42, 365,
42 365, .
.
, ,
. ,
(ordering). ,
, . , ,
, .
, , .
(barrier),
.

180

, , .
,
. , ,
.


, , .
,
< a s m / b i t o p s . h > .
, , , . . 0 ,
. 32- 31
, 0 .
, , 0 31 ( 63
64- ).
,
a t o m i c _ t , .
. .
unsigned long word = 0;
set_bit(0,&word);
/* 0 */
set_bit(l, &word);
/* 1 */
printk("%ul\n", word); /* "" */
clear_bit(1, &word);
/* 1 */
change_bit(0, &word); /* 1,
*/
/*
() */
if (test_and_set_bit(0, &word)) {
/* ... */
}
. 9.2.

. , , ,
. , t e s t _ b i t ()
_ _ t e s t _ b i t ( ) . ,
, , ,
.

181

9.2.

void set_bit (int nr, void *addr)

n r - , addr

void clear_bit (int nr, void *addr)

n r - ,
a d d r

void change_bit ( i n t nr, void *addr)

n r - , addr,

i n t t e s t _ a n d _ s e t _ b i t ( i n t n r , v o i d *addr)

n r - , addr,

int test_and_clear_bit (int nr, void *addr)

nr - , addr,

int test_and_change_bit (int nr, void *addr) nr - , addr,




int test_bit (int nr, void *addr)

n r - , a d d r


, , , . , .
, ? ,
, - ? , ,
, , ?
, ? , , , . , , , .
, ( , , ).
, , ,
.
, : ,
. , .
. , , . , .
, , ,
.
, .

182

,
( ) , , addr:
int find_first_bit(unsigned long *addr, unsigned int size)
int find_first_zero_bit(unsigned long *addr, unsigned int size)

, .
.
, __f f s() _ffz(),
,
.
, , , ,
. ,
.
,
,
.

-
, ,
, . . , ,
,
. .
,
. , ,
(lock).
Linux - -
(spin lock). - ,
. , (contended), .. ,
(busy loop) ""
(spin), . ,
. , .
,
,
, , - .

183

, -, , , , (, , ), .
- .
- ,
. ,
, ,
, . .
,
. , . -,
.
, , 1. (semaphore)
, ,
, , ,
, .
- . <asm/spinlock.h>.
<linux/spinlock.h>. -.
spinlock_t mr_lock = SPIN_LOCK_UNLOCKED;
spin_lock (&mr_lock);
/* ... */
spin_unlock(&mr_lock);


. , .
. , , , , . ,
() . , .

, .
, , () .

184

: - !
, -
Linux . , ,
, , ,
. ,
, , (). !

- (
, ).
, , ( - ),
( ). , , ,
.
(spin), .
, , , ,
.
( ), . ,
.
( , ) ,
, , , .
,
. .
spinlock_t mr_lock = SPIN_LOCK_UNLOCKED;
unsigned long flags;
spin_lock_irqsave(&mr_lock,

flags);

/* ... */
spin_unlock_irqre_store(&rnr_lock,

flags);

s p i n _ l o c k _ i r q s a v e ()
, .
spin_unlock_irqrestore (), , . , , . ,
flags . , .
, , .
.

185


, , . , . , ,
, . ,
. . , s t r u c t f o o
f o o _ l o c k .
. , ,
. , ,
,
.

, , .
. spin_lock_irq() spin_unlock_irq().
spinlock_t mr_lock = SPIN_LOCK_UNLOCKED;
spin_lock_irq(&mr_lock);
/* ... */
spin_unlock_irq(&mr_lock);

, . s p i n l o c k _ i r q ().
, , , , ,
, .
-
C O N F I G _ D E B U G _ S P I N L O C K -. , - - ,
.
-.

-
spin_lock_init () -,
( spinlock_t,
, ).
spin_try_lock () -. , ,
, . ,
. spin_is_locked () ,

186

. .
2
.
. 9.3 -.
9.3. -

spin_iock()
spin_lock_irq()
spin_lock_irqsave()

spin_unlock()

spin_unlock_irq()



spin_unlock_irqrestore()
spin_lock_init()
s p i n l o c k _ t

spin_trylock()

spin_is_locked()
,
,

-
7, " ",
. spin_lock_bh() . spin_unlock_bh() .
, , , , .
, , .
, (tasklet) . , .
2

, "". - - , , . , , .

187

, - , .
, , .
(softirq), ,
, , , .
, , , .
, , .

- -
(reader lock) (writer lock).
, . ( ), .
. , ( ), ,
. ( 3,
" ") . , - (reader-writer spin lock).
/, ,
. Linux - -. -
- .
, , . , ,
, ,
. -
shared/exclusive (/ ) concurrent/
exclusive (/).
-
.
rwlock_t mr_rwlock = RW_LOCK_UNLOCKED;
.
read_lock(&mr_rwlock);
/* ( ) ... */
read unlock(&mr_rwlock);

188

, .
write_lock(&mr_rwlock);
/* ( ) ... */
write_unlock{&mr_rwlock);

, .
, , , "" , .
read_lock(&mr_rwlock);
write_lock(&mr_rwlock);
, , ,
, ; . - , .
, , ,
, . -.

-. .
. , ""
.
r e a d _ l o c k () r e a d _ l o c k _ i r q s a v e ().
,
w r i t e _ l o c k _ i r q s a v e ( ) ,
. . 9.4 -.
, - Linux, ,
. , , ,
. , , , .
"" .
.
- .
,
(, ).
, .

189

9.4. - -

read_lock ()

read_lock_irq()

read_lock_irqsave()

read_unlock()

read_unlock_irq ()

, ,

read_unlock_irqrestore () , ,
write_lock()

write_lock_irq()

write_lock_irqsave ()

write_unlock ()

write_unlock_irq ()

, ,

write_unlock_irqrestore()

, ,

write_trylock()

rw_lock_init()

rwlock_t

rw is locked ()

, ,

Linux (semaphore) , .
, , (wait queue) (sleep). 3, , ,
.
. ,
, , . , , .
.
3

, .

190

, , , . - ,
, ,
. , (.. ) (..
) (.. ) .
, , , (.. ) (..
), . , -,
.
, -, .
,
, .
, ,
, , , .
, , , "" ,
.
, . .
( )
.
,
( , ).
-,
, ,
- .

-. ,
, . ,
, .
, , , .
-,
. , , , . , ,

191

. , ,
, , , , .
, ()
.
, , .
-
, ,
, . (usage count) (count).
, , , , -.
(binary semaphore) (
)
(mutex, ) ( mutual exclusion). , ,
. (counting
semaphore, -), , , .
- , .
. . ,
, , ( , ).
4 (Edsger Wybe
Dijkstra) 1968 .
() V ( ) ,
Proben () Verhogen ( ). down () up () .
Linux . down ()
,
. , . , - .
(down) ,
. up () , .
(upping) .
4

(1930-2002 .) (, ) .
,
. , ,
15 . ,
GOTO Linux.
i

192

.
, .



<asm/semaphore.h>. s t r u c t semaphore . .
static DECLARE_SEMAPHORE_GENERIC(name, count);
name , a c o u n t .
(mutex), , .
static DECLARE_MUTEX(name);
name . , . ,
,
sema_init(sem, count);
sem , a count .


init_MUTEX(sem);
, "mutex" init_MUTEX()
"init" , s e m a _ i n i t () . , , . ,
7 ,
.


d o w n _ i n t e r r u p t i b l e () .
,
TASK_INTERRUPTIBLE. 3 ,
,
.
, , , d o w n _ i n t e r r u p t i b l e () -EINTR.
down ( ) , TASK_UNINTERRUPTIBLE. , ,
, . d o w n _ i n t e r r u p t i b l e ()
, down ( ) . , , , .

193

down_trylock ()
. , .
.
up ( ) .
.
/* mr_sem
, 1 */
static DECLARE_MUTEX(mr_sem);
if (down_interruptible(&mr_sem))
/* */
/* ... */
/* */
up(&mr_sem);
. 9.5.
9.5.

sema_init(struct

init_MUTEX(struct

semaphore *, int)

semaphore *)

init_MUTEX_LOCKED (struct semaphore *)

0 (.. )

down_interruptible(struct semaphore *)

,

(contended)

down(struct semaphore *)

,

(contended)

down_trylock(struct semaphore *)

,

(contended)

up(struct semaphore *)

194

-
, -, -.
, - - -.
- s t r u c t rw_
semaphore, <asm/rwsem.h>.
-
static DECLARE_RWSEM(name);
name .
-, , .
init_rwsem(struct rw_semaphore *sem)
- (mutex), ..
.
,
. , , , . -
,
down (). .
static DECLARE_RWSEM(mr_rwsem);
/* */
down_read(&mr_rwsem);
/* ( ) ..

*/

/* */
up_read(&rar_rwsem);
/* ... */
/* */
down_write(&mr_rwsem);
/* */
/* ( ) ... */
up write(&mr rwsem);
.
down_read_trylock () down_write_
t r y l o c k ( ) . -. , , , .
, !
- ,
- -. d o w n g r a d e w r i t e r (), -

195

, , , .
-, - ,
, ,
, , .
-
, ,
.

-
, -, .
. -, , . . 9.6 ,
.
9.6. : -

(low overhead)



(steep)


(conditional variable, completion variable) , , , ,
. ,
. , , .
, , . , ,
. , vfork()
.
struct completion,
<linux/completion.h>.

196


DECLARE_COMPLETI0N(mr_comp);

init_completion ().
, ,
wait_for_completion () . ,
complete () , , . . 9.7
.
. 9.7.

init_completion(struct completion *)

wait_for_completion(struct completion *)

complete(struct completion *)

k e r n e l /
sched. k e r n e l / f o r k . . ,
, . ,
, w a i t _ f o r _
completion(). ,
complete().

BLK:
" " . (Big
Kernel Lock, BKL) -, , SMP n Linux . BKL
.
BKL .
, ,
, . ,
, BKL, .
BKL .
, , -.
BKL .
BKL .

197

2.0
2.2. 2-0 SMP,
(, ). 2.2

. BKL ,
. , 5.
BKL . , BKL. . . BKL ,
-, , . lock_kernel () , unlock_kernel () .
,
unlock_kernel ().
. k e r n e l l o c k e d ()
, , . <linux/smp_lock.h>.
.
lock_kernel();
/*
* ,
* BKL...
* ,
* .
*
* .
* , ,
* - ,
* !
*/
unlock_kernel();

, .
, , BKL
. . 9.8 BKL.

, , , , " " .

198

9.8.

lock_kernel()

BKL

unlock_kernel()

BKL

kernel_locked()

, ,
-

, ,
, . BKL
(, " foo () "),
(" foo "). , BKL
- , ,
- . ,
,
, .


(seq lock) , 2.6. .
.
-, .
, , . .
, ,
,
( , , ).
.
seqlock_t mr_seq_lock = SEQLOCK_UNLOCKED;
, , .
write_seqlock(&mr_seq_lock);
/* ... */
write_sequnlock(&mr_seq_lock);
, -.
, .
unsigned long seq;
do {
seq = read_seqbegin(&mr_seq_lock);
/* ... */
} while (read_seqretry(&mr_seq_lock, seq));

199


, .
,
.
, . , ,
- -. , ,
, (
) , , .


, , , ,
. , , . , ,
, - , "". - , . , , SMP
, , SMP,
.
, .
, -,
.
- , (-processor data).
, , -, . - ,
,
.

foo


foo


foo

, . - (
).
, .

200

p r e e m p t _ d i s a b l e () . , ..
.
preerapt_enable ().
preemptenable() , .
preempt_disable();
/* ... */
preempt_enable();

,
preempt_disable(). ,
. ,
. . preempt_count ()
. . 9.9
.
9.9.

preempt_disable()

preempt_enable()

preempt_enable_no_resched()

preempt_count()

,
, (
, )
get_cpu (). ,
.
int cpu = get_cpu();
/* , ... */
/* , */
put_cpu();


, , ,
(load) (save) , .
,
. ,

201

,
,
( , ,
).
, ,
6 . , ,

, - . ,

. (barrier).
.
= 1;
= 2;

, b, ,
.
, b
a.
.
,
a b . - .
,
, , b .
= 1;
b = ;

,
a b. , , . ,
"" ,
.
,
.
rmb () (read memory barrier).
, ,
rmb (), , . , , , , ,
.

Intel 86 , .. . -,

202

wmb () (write barrier).


, rmb (), ,
, , , .
rnb () .
, rab {) , . , ( , rmb ()), .
rmb() - read_barrier_depends() -
, , ,

. , ,
, , . ?
, , rmb (),
, .
read_barrier_depends () , rmb (),
read_barrier_depends () n ( ).
mb () rmb (). 1, b 2 1
=3;
mb();
b=4;

2
c=b;
rmb();
d=a;

,
, b, d . , 4 ( ), d 1 ( ).
mb () , a b , rmb () , b
.
- ,

, .
, b
. rmb () wmb () ,
, .
, read_
barrier_depends () rmb(). 1, b - 2, p - &b.

203

1
=3;
mb();
p=&;

pp=;
read_barrier_depends();
b=*pp;

,
b * , . r e a d _ b a r r i e r _ d e p e n d s ()
, * . rmb (), ,
r e a d _ b a r r i e r _ d e p e n d s (). ,
mb () ,
- 1.
smp_rmb () , smp_wmb (), smp_mb() s m p r e a d _ b a r r i e r _ d e p e n d s ()
. SMP-
, , , . SMP- ,
SMP-.
b a r r i e r ()
,
(.. ).
,
, ,
. , . ,
,
, .
, , .
, , ( ).
,
, - .
. 9.10 , ,
Linux.
, . ,
( Intel x86), wmb ()
.
(.. ),
.

204

9.10.

rmb()

read_barrier_depends()

,
,

wtnb()

mb()

smp_rmb()

SMP- rmb() , , , b a r r i e r ( )

smp_read_barrier_depends()

SMP- read_barrier_depends().
, , b a r r i e r ( )

srap_wmb()

SMP- wmb(), , , b a r r i e r ( )

smp_mb()

SMP- mb(), , , b a r r i e r ( )

barrier()

205

10

. , , (time driven),
, 1 (event driven).
, , ,
. , 100 .
, -,
. ,
, 500 . ,
(uptime), .
. 5
, (,
).
, . .
, , , . ,
, 10 , , .
, . , (timer interrupt),
.
Linux, .

, , , . . , , , ,
.

, (dynamic timers)
, , , . ,
, , .
.
, ,
.


. , , . ,
. ,
. ( hitting, popping) . , (tick rate).
, , .
,
. (tick) , .

, (wall
time) (uptime).
, .
, .
,
. , . "" ""
.

. . .
(uptime).
(time of day).
SMP-
, , ( 4, " ").

208

10

, ,
, (
4).
,
.

.
, ..
. , . , , . " "
.

: HZ
( , tick rate)
Z,
. HZ . ,
HZ .
<asm/param.h>.
HZ, 1/HZ. ,
include/asm-i386/param.h i386
.
#define HZ 1000 /* internal kernel time frequency */

i368
1000 , .. 1000 (
).
100 . . 10.1 .
, HZ
. ,

. Alpha ,
100 ,
, 100 , HZ. HZ
.
. , . .
, .

209

10.1.

alpha
arm
cris
h8300
i386
ia64
m68k
m68knommu
mips
mips64
parisc
ppc
ppc64
s390
sti
spare
sparc64
um
v850
x86-64

( )

1024
100
100
100
1000
32 10242
100
50, 100 1000
100
100
100 1000
100
1000
100
100
100
100
100
24, 100 122
1000

HZ
i386,
Linux, 100 .
2.5 1000 ,
( ) .
,
. , , HZ
.
, . , ,
, . .
,
, , ,
.
.
2

IA-64 32 . IA-64 1024 .

210

10

, . , 100 10 . , ,
, 10 , 3 . , 1000 , 1 , .. 10
. ,
1 , 100
, 10 .
. ,
,
, ,
, . , 100
+/- 5 . 5 . 1000
0.5 .
.

( , ).
, p o l l () s e l e c t (), (timeout) , .
, , .
.
p o l l () s e l e c t (). .
, ,
, ,
. ,
(.. ) .
,
.
4, 3

, . ( ) . () , .

211

. ,
need_resched, . , , 2 . ,
2
. ,
, . 1/HZ
! , HZ=100, 10 . , ,
, . - ,
. , , - , , , , .
1000
1 , 0.5 .
, , , 1000 ( ).
.
, .
, . ,
, (trashing)
(.. , ).
, , . ,
HZ=100 HZ=1000 10 ,
. ? "" 10,
"". , , HZ=1000
. 2.6
HZ4.

, ? , ? , ,
.
, .
. , ,
, .
4

NTP, HZ
. 86 100, 500 1000 .

212

10

,
.
HZ.
.
, ,
.
. , , . .
, Linux . ,
. , -.

j i f f i e s
j i f f i e s
, .

. HZ , j i f f i e s
HZ. (uptime) j i f f i e s / H Z .
jiffy
jiffy (, ) . , "in a
jiffy" ( ) . jiffy [)
, .
jiffy ( 10 ms).
, , (, , , , ).
jiffy
, . jiffy
. jiffy 1/60 .
, Unix, jiffy .
100 ms. , jiffy Linux .

j i f f i e s < l i n u x / j i f f i e s . h > .
extern unsigned long volatile jiffies;
, .
. j i f f i e s .
( * HZ)

213

, j i f f i e s
, .
(jiffies / HZ)

. , .
unsigned long time_starnp = jiffies;
/* */
unsigned long next_tick = jiffies + 1; /*
*/
unsigned long later = jiffies + 5*HZ;
/*
*/


, .
, j i f f i e s unsigned long - .

j i f f i e s
j i f f i e s
unsigned long , , 32 32-
64 64-. 32-
j i f f i e s 100 ,
497 , . HZ 1000 47.9 ! 64- j i f f i e s ,
-
HZ .
, j i f f i e s unsigned long.
.
, jiffies
unsigned long.
extern unsigned long volatile jiffies;

< l i n u x / j i f f i e s . h >
.
extern u64 jiffies_64;

ld (1), ( 86 a r c h / i 3 8 6 / k e r n e l /
vmlinux.lds.S), , j i f f i e s
jiffies_64.
Jiffies = jiffies_64;

, j i f f i e s 32
64- jiffies_64.

214

10

j i f f i e s ,
32 .
64- , -.
, ( , ). , , 64
, 64- .
. 10.1 j i f f i e s j i f f i e s _ 6 4 .
j i f f i e s _ 6 4 ( j i f f i e s 64- )

j i f f i e s 32-

. 10.1. jiffies jiffies_64


, j i f f i e s , j i f f i e s _ 6 4 . g e t _ j i f f i e s _ 6 4 ()
64- 5 . ,
32 j i f f i e s .
64- j i f f i e s _ 6 4 j i f f i e s
. j i f f i e s , g e t _ j i f f i e s _ 6 4 ( ) , .

j i f f i e s
j i f f i e s ,
, . 32- 2 3 2 -1.
, 4294967295 .
1, .
.
unsigned long timeout = j i f f i e s + HZ/2; /*
0.5 */
5

, 32- 64- , ,
,
xtime_lock.

215

/* ,
. . . */
if (timeout < jiffies) {
/* ... */
} else {
/* ... */
}

, .
, . , , .
, . . , j i f f i e s ,
timeout?
, j i f f i e s ,
timeout, .
j i f f i e s , timeout. ,
, , ,
. - if !
,
, . <linux/jiffies.h> .
#define time_after(unknown, known) ((long)(known) - (long)(unknown) <
#define time_before(unknown, known) ((long) (unknown) - (long)(known) <
#define time_after_eq(unknown, known) ((long) (unknown) - (long)(known) >=
#define
time_before_eq(unknown, known) ((long) (known) - (long)(unknown) >=

0)
0)
0)
0)

unknown j i f f i e s ,
known , .
time_after (unknown, known) true,
unknown known,
false. time_before (unknown, known) t r u e , unknown ,
known, false.
, ,
"", .
, , , .
unsigned long timeout = jiffies + HZ/2; /*
0.5 */

216

10

/* ,
... */
if (time_after(jiffies, timeout}) {
/* ... */
} else {
/* ... */
}
, , , .
, , ,
.

HZ
Z . , , , ,
.
, , HZ . ,
HZ , ,
. , ,
!
20 ,
.
,
j i f f i e s , . USER_HZ, HZ,
.
86 HZ 100, USER_
HZ=100. j i f f i e s _ t o _ c l o c k _ t ( )
, HZ,
, USER_HZ. , HZ USER_HZ . , .
#define jiffies_to_clock_t(x) (() / (HZ / USER_HZ))
, .
j i f f i e s _ 6 4 _ t o _ c l o c k _ t () 64- j i f f i e s HZ USER_HZ.
, ,
, , .
unsigned long start = jiffies;
unsigned long total_time;
/* ... */
total_time = jiffies - start;
printk("TO %lu \n", jiffies_to_clock_t(total_time));

217

, , HZ=USER_HZ.
, .
, :
, .
printk(" %lu \n", t o t a l time / HZ);


,
, ,
, .
, .


(real-time clock, RTC) . RTC , , , . PC
RTC - BIOS.
RTC BIOS.
RTC
,
xtime. , , 8,
RTC. , ,
xtime.


. , ,
, , .
, . (decrementer), , , ,
, . , .
.
86 (programmable interval timer, PIT). PIT
218

10

PC. Co DOS . PIT ,


HZ. , ,
, .
86 APIC (Advanced Programmable Interrupt Controller,
) (TSC, Time Stamp Counter).


, , j i f f i e s HZ, ,
. : ,
, .
, , , . , , , .
xtime_lock, jiffies_64 xtirne.
, .
.
- do_timer ().
- do_timer ()
.
jiffies_64 (
32- ,
xtime_lock ).
, , , .
,
( ).
scheduler_tick () , 4.
,
xtime.

(load average).

219

,
.
void do_timer(struct pt_regs *regs)
{
jiffies_64++;
update_process_times(user_mode(regs));
update_times();
}
user_mode () , r e g s ,
1, , 0 . u p d a t e _
p r o c e s s _ t i m e s ,
.
void update_process_times(int user_tick)
{
struct task_struct *p = current;
int cpu = smp_processor_id();
int system = user_tick ^ 1;
update_one_process (p, user_tick, system, cpu);
run_local_timers() ;
scheduler_tick(user_tick, system);
}
u p d a t e _ p r o c e s s () . .
, (XOR) , u s e r _ t i c k system , ,
. u p d a t e _ o n e _ p r o c e s s () .
/*
*
*/
p->utime += user;
p->stime += system;
1, .
, , ,
.
. ,
, . ,
, , . .
r u n _ l o c a l _ t i m e r s () , (. 7, " 220

10

"), ,
. , "".
, s c h e d u l e _ t i c k ()
need_resched . SMP-
. 4.
u p d a t e _ p r o c e s s _ t i m e s ()
update_times (), .
void update_times(void)
{
unsigned long ticks;
ticks = jiffies - wall_jiffies;
if (ticks) {
wall_jiffies += ticks;
update_wall_time(ticks);
}
last_time_offset = 0;
calc_load(ticks);
}

t i c k s
. , , 1.
, ,
. , .
. w a l l _ j i f f i e s t i c k s , j i f f i e s
. update_
wall_time () , xtime, . calc_load ()
, ,
update_times () .
do_timer () - , ,
xtirae_lock .
1/HZ , .. 1000
PC.


(time of day, wall time, ) kernel/timer. .
struct timespec xtime;

221

timespec <linux/time.h>
.
struct timespec {
time_t tv_sec; /* seconds */
long tv_nsec; /* nanoseconds */
};
x t i m e . t v _ s e c , 1
1970 (UTC, Universal Coordinated Time, ).
epoch ( ). Unix- . xtime.tv_nsec , .
xtime xtime_lock.
-, ,
9, " ".
xtime
.
write_seqlock(&xtime_lock);
/* xtime ... */
write_sequnlock(&xtime_lock);
xtime read_
seqbegin() r e a d _ s e q r e t r y () .
do {
unsigned long lost;
seq = read_seqbegin(&xtime_lock);
usec = timer->get_offset();
lost = jiffies - wall_jiffies;
if (lost)
usec += lost * (1000000 / HZ);
sec = xtime.tv_sec;
usec += (xtime.tv_nsec / 1000);
} while (read_seqretry(&xtime_lock, seq));
, ,
.
xtime ,

.
g e t t i m e o f d a y () ,
sys_gettimeofday() .
asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz)
{

222

10

if (likely(tv !=NULL)) {
struct timeval_ktv;
do_gettimeofday(&ktv);
if (copy_to_userftv, &ktv, sizeof(ktv))
return -EFAULT;
}
if (unlikely(tz !=NULL)) {
if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
return -EFAULT;
}
return 0;
}

tv,
- do_gettimeofday() .
xtime,
. , tz ,
(time zone), . sys_tz.

, -EFAULT. .
t i m e ( ) 6 ,
gettimeofday() .
, ,
ftime() ctirae().
settimeofday()
. , CAP_SYS_TIME.
xtime,
, .
, .

(timers), , , ,
, .
.
"".
, -

. , .
, , .

sys_time () ,
gettimeofday ().

223

. , , , , , . ,
. . , . ,
7. , . .


timer l i s t ,
<linux/timer.h> .
struct tirner_list {
struct list_head entry; /* */
unsigned long expires; /*
(jiffies) */
spinlock_t lock; /* */
void (*function) (unsigned long); /*- */
unsigned long data; /* */
struct tvec_t_base_s *base; /* , ! */
};
,
. ,
,
. , . < l i n u x / t i m e r . h > .
kernel/timer..
.
struct timer_list my_timer;
,
.
, .
init_timer(&my_timer);
, , .
my_timer.expires = j i f f i e s + delay; /*
delay */

, ( 2.3) . , .
- .

224

10

my_timer.data = 0; /* - ,
*/
my_timer.function = my_function; /* , ,
*/
m y _ t i m e r . e x p i r e s ( ).
j i f f i e s
my_timer . e x p i r e s , - m y _ t i m e r . f u n c t i o n
m y _ t i m e r . d a t a . t i m e r _ l i s t , - .
void my_timer_function(unsigned long data);
d a t a . , ( )
.
.
add_timer(&my_timer);
!
e x p i r e d . ,
, ,
. ,
, . , , . ,
.
,
. mod_timer (), .
i

mod_timer(&my_timer, jiffies + new_delay); /*


*/
mod_timer () , , . , m o d _ t i m e r ( )
. 0, , 1, .
mod_timer () ,
.
, d e l _ t i m e r () .
del_timer(&my_timer);
, . , 0, 1. ,

225

, , .
. del_timer () ,
, (.. ).

. , , , del_timer_sync () :
del_timer_sync (&my_timer);

del_timer(), del_timer_sync() .

,
,
. -, ,
inod_timer ().
del_timer (my_timer) ;
my_timer->expires = jiffies + new_delay;
add_timer(my_timer);

-,
del_timer _sync (), del_timer (). , .
, - , . .
, , - . .
, 8 9.



. update_process_times (), run_local_timers () , .
void run_local_timers(void)
{
raise_softirq(TIMER_SOFTIRQ);
}

TIMER_SOFTIRQ
run_tirner_softirq (). , ( ).
226

10

. , ,
.
. 5 . ,
.
, , ,
, .
, .


( )
. , . . ,
Ethernel-,
2 , ..
, .
, . .
, , . , ,
8 .


( )
(busy
loop, busy wailing). , .
,
, .
unsigned long delay = jiffies + 10; /* */
while (time_before (jiffies, delay));

, ,
. , ,
, , , ,
. .

227

, j i f f i e s ,
delay, ,
10 . 86 HZ, 1000, 10 .
.
unsigned long delay = jiffies + 2*HZ; /* */
while (time_before(jiffies, delay));
, 2*2 , 2 , .
. ,

! "" , ,
. - .
,
, :
unsigned long delay = jiffies + 5*HZ;
while (time_before(jiffies, delay))
cond_reschcd();
c o n d _ r e s c h e d ( ) ,
, n e e d _ r e s c h e d . , , , , . , .
,
, .
,
( !). , , , .
, , ? .
, j i f f i e s .
, j i f f i e s
, , . < l i n u x /
j i f f i e s . h > v o l a t i l e . v o l a t i l e , ,
, , . , , .

228

10


( ) (, ), .
,
,
.
j i f f i e s , .
, 100 , 10 !
1000 , . , , .
, < l i n u x / d e l a y . h >
j i f f i e s .
void udelay(unsigned long usecs);
void mdelay(unsigned long msecs);

.
. , 1000
, 1000000 .
.
udelay(150); /* 150 ms */
u d e l a y () , ,
. mdelay ()
u d e l a y ( ) . ,
(
BogoMlPS), u d e l a y ()
, .
BogoMIPS , !
BogoMlPS . BogoMlPS
u d e l a y ( ) m d e l a y ( ) .
bogus () MIPS (million of instructions per second,
). ,
( Pentium III 1 ).
Detected 1004.932 MHz processor.
Calibrating delay loop...

1990.65 BogoMlPS

BogoMIPS - ,
, ,
! l o o p s _ p e r _
j i f f y , / p r o c / c p u i n f o .

, , , (
), , .
1 o o p s _ p e r _ j i f f
c a l i b r a t e _ d e l a y ( ) , i n i t / m a i n . c .
u d e l a y ( ) , . :
u d e l a y ( ) , .
m d e l a y ( ) . , , ( m d e l a y ( ) ,
) ,
. , , ,
. , .

, .

schedule_timeout()

schedule_timeouit ( ) . (sleep) , .
, ,
, . , (wake up) .
.
/* */
set_current_state(TASK

INTERRUPTIBLE);

/* s */
schedule_timeout(s * HZ);
, .
, s .
TASK_INTERRUPTIBLE, , .
, , TASK_
UNINTERRUPTIBLE. s c h e d u l e _ t i r n e o u t ( )
, .
, s c h e d u l e _ t i r a e o u t ( ) , , , . ,

230

10

, 8 9. , .
s c h e d u l e _ t i r a e o u t ( ) . . .
signed long schedule_timeout(signed long timeout)
{
timer_t timer;
unsigned long expire;
switch (timeout)
{
case MAX_SCHEDULE_TIMEOUT:
schedule () ;
goto out;
default:
if (timeout < 0)
{
printk(KERN_ERR "schedule_timeout: wrong timeout "
"value %lx from %p\n", timeout,
builtin_return_address(0));
current->state = TASK_RUNNING;
goto out;
}
}
expire = timeout + jiffies;
init timer(&timer);
timer.expires = expire;
timer.data = (unsigned long) current;
timer.function = process_timeout;
add_timer(&timer);
schedule() ;
del_timer_sync(&timer) ;
timeout = expire - jiffies;
out:
return timeout < 0 ? 0 : timeout;
}

timer timeout .
process timeout () , , . ,
schedule (). , TASK_INTERRUPTIBLE TASK_UNINTERRUPTIBLE, , .
, process_
timeout (), .

231

void process_timeout(unsigned long data)


{
wake_up_process((task t *) data);
}
TASK_RUNNING
.
,
s c h e d u l e _ t i r a e o u t () ( s c h e d u l e ()). , .
-.
s w i t c h () . MAX_SCHEDULE_TIMEOUT .
(
), . , ,
, !

wait queue
4 ,
, ,
, . -
, wake_up ()
, . .
, , ,
s c h e d u l e _ t i m e o u t ()
s c h e d u l e () , .
,
. , , , .


,
,
. ,
. , , HZ j i f f i e s .
, .
, .
, , ,
. , ,
. .
232

10

11

, . . , ,
, ,
, . ,
. , , . - - ,
,
, . , ,
,
-.
,
. , , , .


. ,
, , (MMU, Memory
Management Unit) ,
. MMU ( ). ,
.
19, "",
.
. 32- , 4 , 64- 8 . , , 4 , , 1 ,
262 144 .


struct page. <linux/mrn.h> .
struct page {
page_flags_t
atomic_t
atornic_t
unsigned long
struct address_space
pgoff_t
struct list_head
void
};

flags;
_count;
_mapcount;
private;
*mapping;
index;
lru;
*virtual;

. flags . : (dirty) (locked) .


, 32 .
<linux/page-flags.h>.
_count ..
. , ,
, . ,
page_count () , page . _count (
), page_count ()
,
.
( mapping address_space,
), ( private) .
v i r t u a l .
. (
, high memory)
(.. ).
NULL .
.
, , ,
page , . ,
, , . , ,
, ,
, - (swapping) .
,
, .

234

11

, ,
.
, , (.. ). , ,
. : , ,
, (page cache) ..
,
. : " !" , ( ) . s t r u c t page 40 . ,
1 , 128 .
1 .

,
. ,
, .
- .
, . , Linux , .

(, DMA, Direct Memory Access) .

, . ,
.
, Linux
.
Z0NE_DMA. , DMA.
ZONE_NORMAL. ,
.
ZONE_HIGHMEM. " ", ,
.
<linux/mmzone.h>.
, , . , . ZONE_DMA
, ZONE_NORMAL.

235

86,
1
ISA DMA 32- , ISA 16
. , ZONE_DMA 8
0-16 .
ZONE_HIGHMEM. To,
,
. 86 ZONE_
HIGHMEM , 896 .
ZONE_HIGHMEM ,
. , ZONE_HIGHMEM,
2
(high memory). (low memory).

ZONE_NORMAL ,
. 86, , ZONE_NORMAL
16 896 . ,
, SONE_NORMAL . . 11.1
86.
11.1. 86

ZONE_DMA

< 16

ZONE_NORMAL

16 - 896

ZONE_HIGHMEM

> 896

,
. ,
ZONE_DMA , DMA. ,
ZONE_DMA. ,
, ; .

, . ,
ZONE DMA,
ZONE_NORMAL,
ZONE_DMA. ,
ZONE_NORMAL,
ZONE_DMA , ,
( ),
.

PCI 24- . .
2

DOS.

236

11

s t r u c t zone, <linux/mmzone.h> .
struct zone {
spinlock t
unsigned ]ong
unsigned long
unsigned long
unsigned long
unsigned long
spinlock_t
struct list_head
struct list_head
unsigned long
unsigned long
unsigned long
unsigned long
int
unsigned long
int
int
struct free_area
wait_queue_head_t
unsigned long
unsigned long
struct per_cpu_pageset
struct pglist_data
struct page
unsigned long
char
unsigned long
unsigned long
};

lock;
free_pages;
pages_min;
pages_low;
pages_high;
protection[MAX_NR_ZONES];
lru_lock;
active_list;
inactive_list;
nr_scan_active;
nr_scan_inactive;
nr_active;
nr_inactive;
all_unreclaimable;
pages_scanned;
temp_priority;
prev_priority;
free_area[MAX_ORDER];
*wait_table;
wait_table_size;
wait_table_bits;
pageset[NR_CPUS];
*zone_pgdat;
*zone_mem_map;
zone_start_pfn;
*name;
spanned_pages;
prcsent_pages;

,
. .
lock -, . , , ,
. ,
, .
free_pages .
pages_min ,
(, ).
name , , ( ). , mm/page_alloc..
"DMA", "Normal" "HighMem".

237


, ,
, , ,
, . . ,
, < l i n u x / g f p . h > . .
struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
2 o r d e r (.. 1 << o r d e r )
( ) page, . NULL. gfp_mask
.
.
void * page_address(struct page *page)
, . s t r u c t page, .
unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int
order)
, a l l o c _ p a g e s ( ) ,
, . ,
.
, -,
.
struct page * alloc_page(unsigned int gfp_mask)
unsigned long __get_free_page(unsigned int gfp_mask)
, , 0
o r d e r (2 = ).


, .
unsigned long get_zeroed_page(unsigned int gfp_mask)
_ _ g e t _ f r e e _ p a g e () , ,
.
, , -

238

11

"", ,
(, ) .
-
, ,
. . 11.2 .
11.2.

a l l o c _ p a g e (gfp_mask)

page
2 o r d e r page

2 o r d e r

alloc_pages (gfp_mask, order)


_ _ g e t _ f r e e _ p a g e (gfp_mask)
__get_free_pages (gfp_mask, order)
g e t _ z e r o e d _ p a g e (gfp_mask)


, , .
void __free_pages(struct page *page, unsigned int order)
void free_pages(unsigned long addr, unsigned int order)
void free_page(unsigned long addr)
, . page, addr
o r d e r . , .
, , . . 8 .
page = __get_free_pages(GFP_KERNEL, 3 ) ;
if (!page) {
/* : ! */
return -ENOMEM;
}
/* 'page' */
free_pages(page, 3);
/*
*
* , 'page'
*/

239

GFP_KERNEL, ,
gfp_mask, .
__get_free_
pages () . ,
.
, , .
, ,
.
.
,
, , .
, , kmalloc().

kmalloc ()
kmalloc () mall () , , flags.
kmalloc () . , ,
. kmalloc () .
<linux/slab.h> .
void * kmalloc(size_t size, int flags)

,
size 3. . NULL.
, .
kmalloc () NULL .
. ,
, dog.
struct dog *ptr;
ptr = kmalloc (sizeof (struct dog), GFP_KERNEL);
if (!ptr)
/* ... */
3

, , ,
!
, , . , .
v, , NULL.

240

11

kmalloc () , p t r
, . GFP_KERNEL ,
kmalloc ().

gfp_mask
, , , , kmalloc {) .
.
: ,
. , .
. ,
, ( ),
, . , ,
. , .

, . , . GFP_KERNEL , , .
.


, ,
<linux/gfp.h>. <linux/slab.h> , .
, .
. . 11.3 .
,
.
ptr = kmalioc(size, __GFP_WAIT | __GFP_IO | __GFP_FS);

( alloc_pages ()), , -
, . ,
, .
,
, . , , !

241

1 1 . 3 .

__GFP_WAIT

__GFP_HIGH

__GFP_IO

__GFP_FS

__GFP_COLD

,
(cache cold)

__GFP_NOWARN
__GFP_REPEAT

__GFP_NOFAIL

__GFP_NORETRY

__GFP_NO_GROW
__GFP_COMP

(compound) .
(hugetlb)

(slab layer)


, . .
ZONE_NORMAL, , , ,
.
, , ZONE_N0RMAL
( ), .
. 11.4 .
11.4.

__GFP_DMA

ZONE_ DMA

__GFP_HIGHMEM

ZONE_HIGHMEM ZONE_NORMAL

, . __GFP_DMA ,
ZONE_DMA.
: " ,
". __GFP_HIGHMEM, , , ZONE_NORMAL ZOHE_HIGHMEM ( ). : " ,
, , , , ".

242

11

,
ZONE_NORMAL ZONE_DMA, ZONE_NORMAL.
__GFP_HIGHMEM __get_free_pages ()
k m a l l o c (> . , ,
page, , ,
. a l l o c _ p a g e a () . , ,
ZONE_NORMAL.


,
. , . .
. 11.5 , . 11.6 ,
.
11.5.

GFP_ATOMIC


.
, ,

GFP_NOIO

,
-.
-,
-

GFP_NOFS

-, , .
,

GFP_KERNEL

, .
, ,

GFP_USER

, .

GFP_HIGHUSER ZONE_HIGHMEM, .
GFP_DMA

ZONE_DMA. , ,

243

11.6. ,

GFP_ATOMIC

__GFP_HIGH

GFP_NOIO

__GFP_WAIT

GFP_NOFS

(__GFP_WAIT | __GFP_IO)

GFP_KERNEL

(__GFP_WAIT | __GFP_IO | __GFP_FS)

GFP_USER

(__GFP_WAIT | __GFP_IO | __GFP_FS)

GFP_HIGHUSER

(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM)

GFP_DMA

__GFP_DMA

.
GFP_KERNEL.
. , , (..
..). ,
,
.
, GFP_ATOMIC . ,
, , . , , , ,
. GFP_KERNEL, ,
, (swap
out), (flush
dirty pages) .. GFP_ATOMIC
,
( , ).
GFP_ATOMIC ,
, .
GFP_NOIC
GFP_NOFS. , ,
, .
GFP_NOIO -. , GFP_NOFS
-,
. ? -
. ,
GFP_NOFS.
, -

244

11

,

! , ,
,
, , .
,
.
GFP_DMA , ZONE_DMA. ,
.
CFP_ATOMIC GFP_KERNEL.

GFP_ATOMIC GFP_KERNEL. . 11.7
. , .
11.7.

,
,

GFP_KERNEL
G F P _ A T O M I C G F P _ K E R N E L
,

G F P _ A T O M I C

G F P _ A T O M I C



,
(GFP_DMA | G F P _ K E R N E L )

(GFP_DMA | GFP_ATOMIC)
,

kfree()
kmalloc () k f r e e (),
< l i n u x / s l a b . h > .
void kfree(const void *ptr)
k f r e e () ,
kmalloc ( ) . , kmalloc () ,
, ,
. , , . ,
k f r e e (NULL) .

245

.
. . BUF_SIZE,
, , , ,
.
char *buf;
buf = kmalloc(BUF_SIZE, GFP_ATOMIC);
if (!buf)
/* ! */
, ,
kfree(buf);

vmalloc ()
v m a l l o c () kmalloc ( ) ,
, ,
.
: , malloc ( ) , , , . k m a l l o c () , ,
( ) . vmalloc ()
, .

"" ,
.
, . "" .
, , ,
,
. , , , ,
, .
. ,
.

,
k m a l l o c () , v m a l l o c () . , ,
. , v m a l l o c ()
. , , v m a l l o c () , , (

246

11

). TLB4, ,
. vmalloc ()
, ,
. , , ,
vmalloc ().
v m a l l o c () < l i n u x / v m a l l o c . h >
mm/vmalloc.. malloc ()
.
void * vmalloc(unsigned long size)
s i z e . NULL. ,
.
, vmalloc () ,
void vfree(void *addr)
, a d d r
v m a l l o c ( ) .
. .
. , .
char *buf;
buf = vmalloc (16 * PAGE_SIZE); /* 16 */
if (!buf)
/* ! */
/*
* buf
* , , 16*PAGE_SIZE ,
*
*/
, , .
vfree ( b u f ) ;

TLB (translation lookside buffer ) ,


. , .

247


, .
,
(free list).
. , ,
. , ,
, .
, , .
, ,
, .
, , , .
.
(slab layer), (slab allocator). .
SunOS 5.4 Sun Microsystems'.
Linux
.
.
, ,
, .
( ). ,
.
, .

,
.
, ,
,
.
5

Bonwirk J."The Slab Allocator: An Object-Caching Kernel Memory


Allocator," USENIX, 1994.

248

11

, (..
),
SMP-.

(Non-Uniform Memory Access NUMA),
(node), .
"',
.
Linux
.


, (cache). . . ,
(
s t r u c t t a s k _ s t r u c t ) , ( s t r u c t inode).
, krnalloc () .
( slab , ).
. .
.
, . : (full), (partial) (empty).
( ).
. , , .
. ,
. , ,
.
.
inode, (. 12).
, . s t r u c t inode inode_
cachep ( ).
, , .
s t r u c t inode. s t r u c t inode, ,
, , . inode,

249

. . 11.1 ,
.

. 11.1. ,
kmem_cache_s.
s l a b _ f u l l , s l a b _ p a r t i a l slab_empty,
kmem_list3. , .
s t r u c t s l a b , .
struct slab {
struct list head list; /* ,
*/
unsigned long colouroff; /* */
void *s_mem;
/* */
unsigned int inuse;
/* */
kmem_bufctl_t free;
/* , */
};
, , . , , , .
, _ _ g e t _ f r e e _ p a g e s () .
static void *kmem getpagss(kmem cache_t *cachep, int flags, int nodeid)
{
struct page *page;
void *addr;
int i;

250

11

flags |= cachep->gfpflags;
if (likely(nodeid == -1)) {
addr = (void*)__get_free_pages(flags, cachep->gfporder);
if (!addr)
return NULL;
page = virt_to_page (addr) ;
} else {
page = alloc_pages_node(nodeid, flags, cachep->gfporder);
if (!page)
return NULL;
addr = page_address(page);
}
i = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
atomic_add(i, &slab_reclaim_pages);
add_page_state(nr_slab, i);
while (i--) {
SetPageSlab(page);
page++;
}
return addr;
}
,
. , _ _ g e t _ f r e e _ p a g e s ( ) . ,

. ,
f l a g s . cachep->gfporder. ,
, NUMA-
(Non-Uniform Memory Access, ). n o d e i d - 1 ,
, .
NUMA-.
.
, NUMA-,
kraem_getpages () .
static inline void * kmem_getpages(kmem_cache_t *cachep, unsigned long flags)
{
void *addr;
flags |= cachep->gfpflags;
addr = (void*) __get_free_pages(flags, cachep->gfporder);
return addr;
}

251

kmem_freepages (), f r e e _ p a g e s () . ,

.
,
.
, .
,
.
, . .
, ,
.


.
kmern_cache_t * kmem_cache_create (const char *name, size_t size,
size_t offset, unsigned long flags,
void (*ctor) (void*, kmem_cache_t *,unsigned long),
void (*dtor) (void*, kmem_cache_t *,unsigned long))
, .
. . , . , ,
. f l a g s , . ,
,
, .
SLAB_NO_REAP , " " (.. , ) . , ,
.
SLAB_HWCACHE_ALIGN
,
. "
",
, . ,
, , .

252

11

,
. , , , ,
, .
SLAB_MUST_HWCACHE_ALIGN. ,

. ,
. , . ( ) .
, ,
, .
SLABPOSON (5555). "" (poisoning)
.
SLAB_RED_ZONE " " (red zone)
.
SLAB_PANIC , . ,
, , ,
VMA ( , . 14, " ") .
SLAB_CACHE_DMA ,
, . , ZONE_DMA.
.
c t o r d t o r . ,
. , .
, .
Linux .
NULL.
kmem_cache_create () . NULL.
, .
.
int kmem_cache_destroy(kmem_cache_t *cachep)

253

.
, .
, .
,
.
. , - , ,
?

kmem_cache_destroy ().
.
, .
, .
void * kmem_cache_alloc(kmem_cache_t *cachep, int flags)
, cachep. ,
kmem_getpages (), f l a g s __get_free_
pages (). , . , GFP_KERNEL GFP_ATOMIC.
, , .
void kmem_cache_free(kmem_cache_t *cachep, void *objp)
, objp, .


, t a s k _ s t r u c t ( ).
kernel/fork..
,
t a s k _ s t r u c t :
kmem_cache_t *task_struct_cachep;
, f o r k i n i t (), .
task_struct_cachep = kmem_cache_create("task_struct",
sizeof(struct task_struct),
ARCH_M1N_TASKALIGN,
SLAB_PANIC,
NULL,
NULL);
254

11

" t a s k _ s t r u c t " ,
s t r u c t t a s k _ s t r u c t . , ARCH_MIN_TASKALIGN , , .
LI_CACHE_BYTES,
.
. ,
NULL, SLAB_PANIC. ,
, p a n i c ( ) . ,
NULL, . SLAB_PANIC
, (
- ).
, f o r k ( ) , ( 3, " "). d u p _ t a s k _ s t r u c t () ,
do_fork () .
struct task_struct *tsk;
tsk = kmem_cache_alloc(task struct_cachep, GFP_KERNEL);
if (!tsk)
return NULL;
, ,
, t a s k _ s t r u c t _ c a c h e p .
f r e e _ t a s k _ s t r u c t (), ( t s k ).
kmem_cache_free(task_struct_cachep, tsk);

, t a s k _ s t r u c t _ c a c h e p .
, .
int err;
err = kmem_cache_destroy (task_struct_cachep);
if (err)
/* */
, ? , , "",
, " " .
,
.
!

255


,
, , . ,
,
. , .
,
, . .
8 32- 16 64- .
2.6 ,
. , , : 4 32- 8 64-. . -
. -, , (uptime) . ,
.
( ,
).
.
, . ,
8 . ,
.
, .
.
, .
, .
. .
. , , . ,
4 16 . .
. a l l o c a ( ) .
256

11


.
,
(
, ) . , .
, .
.
, , .
t h r e a d _ i n f o ,
( 3). . .
.
, ,
.


,
. ,
a l l o c _ p a g e s (), __GFP__HIGHMEM .
86 896 , , 86
4 ( 64 6).
. 86 - 3 4 .


page
, .
void *kmap(struct page *page)
, .
page , . ,

Physical Address Extension ( ).


86 36 (64 ) ,
32 .

257

. kmap () ,
.
(
, , ),
,
. .
void kunmap(struct page *page)
, page.


,
, ,
( ). , " ".
. , , , , , .
.
void *kmap_atomic(struct page *page, enum km_type type)
t y p e , < a s m / k m a p _ t y p e s . h > , .
enum km_type {
KM_BOUNCE_READ,
KM_SKB_SUNRPC_DATA,
KM_SKB_DATA_SOFTIRQ,
KM_USER0,
KM_USER1,
KM_BIO_SRC_IRQ,
KM_BIO_DST_IRQ,
KM_PTE0,
KM_PTE1,
KM_PTE2,
KM_IRQ0,
KM_IRQ1,
KM_SOFTIRQ0,
KM_SOFTIRQ1,
KM TYPE_NR
);

, .

258

11

, , (
).
.
void kunmap_atomic(void *kvaddr, enum km_type type)
.
, ,
,
. ""
kmap_atomic () , kunmap atomic () .
.

,

,
(per-CPU data). , . , ,
. .
. , , 2.4. , 2.6
. ,
unsigned long my_percpu[NR_CPUS];
, .
int cpu;
cpu = get_cpu();

/*
*/

my_percpu[cpu]++;
printk(" cpu=%d %ld\n",
cpu, my_percpu[cpu]);
put_cpu(); /* */
, ,
. ,
, ,
, ,
.
, -
.
.

, cpu , -

259

pa. ( , ,
.)
, my_percpu ,
.
, g e t c p u (),
,
. put_cpu () . , smp_processor_icl (),
,
,
.

percpu
2.6 , percpu, , . .
per-CPU- .
, , . ,
, .
< l i n u x / p e r c p u . h > . mm/slab. <asm/percpu.h>.

,
,
, ,
.
DEFINE_PER_CPU(type, name);
t y p e name, . , .
DECLARE_PER_CPU(type, name);
g e t _ c p u _ v a r ()
p u t _ c p u _ v a r () . g e t _ c p u _ v a r () 1- (
, 1-value) . ,
p u t _ c p u _ v a r () .

260

get_cpu_var(name)++;
put_cpu_var();

/*
name, */
/* */

, .
per_cpu(name, cpu)++;

/* name
*/

per_cpu () ,
.
, , , .
, . !
8, " ", 9, "
".
.
, . , , ( , . d a t a . p e r c p u ) .
, ,
, .


, , , , kmalloc () .
.
<linux/percpu.h> .
void *alloc percpu(type); /* */
void *__alloc_percpu(size_t size, size_t align);
void free_percpu(const void * ) ;
a l l o c percpu () (
) .
__alloc_percpu ().
, , ,
.
alloc_percpu () ,
. , .
struct rabid_cheetah = alloc_percpu(struct rabid_cheetah);,
.
struct rabid_cheetah = __alloc_percpu(sizeof (struct rabid_cheetah),
alignof (struct rabid_cheetah));

261

_ _ a l i g n o f _ _ , gcc,
, ( ,
).
s i z e o f (). , ,
86 4.
__alignof__ (unsigned long)
1- ( , lvalue)
, 1-.
, 1- , ,
.
19, "".
f r e e p e r c p u () ,
.
a l l o c _ p e r c p u () _ _ a l l o c _ p e r c p u () ,
,
.
.
get_cpu_ptr(ptr) ; /* void ,
ptr, */
put_cpu_ptr(ptr); /* ,
*/
g e t _ c p u _ p t r () ,
. ,
p u t _ c p u _ p t r ( ) .
. , , (, ),
, (, , ). .
void *percpu_ptr;
unsigned long *foo;
percpu_ptr = alloc_percpu(unsigned long);
if (!ptr)
/* .. */
foo = get_cpu_ptr(percpu_ptr);
/* foo .. */
put_cpu_ptr(percpu_ptr);
p e r _ c p u _ p t r ( ) ,
.
per_cpu_ptr(ptr, cpu);

262

11

. "" , , , , .

,

, , . -, .
, ,
, . ,
" " -

. , .
.
-, , , , . ,
.
, , .
, (cash
thrashing), . , ,
,
.
, , ,
( ,
). , , ,
. , . ,
, , . ,
, ,
{ ).
- , . ( , ),
.
, , . ,
, .
.

263



,
, kmalloc().
, , ,
. , , ,
GFP_ATOMIC GFP_KERNEL. ,
,
GFP_ATOMIC. ,
. , , , ,
-, GFP_KERNEL.
, ,

.
, a l l o c _ p a g e s ( ) . alloc_pages ()
s t r u c t page, .
,
s t r u c t page. ""
kmap(), .
,
,
vmalloc() (
vmalloc() kmalloc() )
vmalloc() ,
, . ,
.
,
.
(
), , .
, .
,
, .

264

11

12

(Virtual File System), (Virtual File Switch) VFS, ,


. VFS,
, .

,
, . 12.1,


ext3

VFS

CP(1)


ext2

. 12.1. VFS :
(1) e ,
, ext3, ,
ext2


VFS , , open (), read () write (),
. , . ,
, . ,

. (, DOS)
. "" . ,
, Linux, , . Linux
,
.



, , . Linux ,

. ,
VFS ,
. , Unix
( Unix, ). Linux
.
, .
VFS, "
" , " ". . VFS , ..
, , , .
, . ,
, .
266

12

, , , .

, . ,
, .
write(f, &buf, len);

len &buf
, f, .
sys_write (),
,
, f.
, ( , ). . 12.2 ,
write () .
, VFS .

Unix
, Unix , : , (directory entry), (inode) (mount point).
. , . , , (create), (delete) (mount). Unix
1, (namespace). 2.
(file) . , - . , , .
(read), (write), (create)
(delete).

Linux , .. .
,. ,
.
2

, , :. , . ,
.

267

(directory). ,
. . (path).
(directory entry).
"/home/wolfman/foo". "/", home wolfman, a
f , dentry. Unix , .
, ,
.
Unix- ( , , , ..).
(file, metadata), .. , ,
(inode). index node (
), "inode" .
, (superblock). ,
. . .
Unix , .
, , ,
, .. VFS
Linux ,
. Unix- , FAT
NTFS, Linux,
. , ,
, , .
,
VFS .
Unix
Unix VFS. , .

268

12

write()

sys_write()

VFS

, 12.2. ,
wri te (), VFS, , ,

VFS
(VFS) - 3 . .
. , , , .
, ,
.
VFS.
(superblock), .
(inode), .
(denlry),
.
(file), , .
, VFS , .
, dentry ,
. , deniry , , , . ?
operations (). , .

, - . C++
- (),
. VFS , - , - .

269

, .
super_operations ( ) ,
, , , read_inode () sync_fs ().
i n o d e o p e r a t i o n s ( ) , , , ,
c r e a t e d l i n k ().
d e n t r y _ o p e r a t i o n s ( )
, ,
, , d_compare () d_delete ().
f ile_operations ( ) , , , read () wri te ().
, . , .
,
. .
, ,
( C++
Java). ,
, , . , .

VFS
VFS "" ,
, . file_system_type,
. , vfsmount. , , .
, ,
, . f i l e _ s t r u c t , f s _ s t r u c t
namespace.
VFS.

superblock
. ,
. (superblock)
(control block) , ( ). ,

270

12

(, , sysfs), " " .


s t r u c t super_block, <linux/fs.h>. ( ).
struct super_block {
struct list_head
s_list;
/* */
dev_t
s_dev;
/* */
unsigned long
s_blocksize;
/* */
unsigned long
s_old_blocksize; /* */
unsigned char
s_blocksize_bits; /* */
unsigned char
s_dirt;
/* , */
unsigned long long
s_maxbytes;
/* */
struct file_system_type *s_type;
/* */
struct super_operations *s_op;
/* */
struct dquot_operations
*dq_op;
/* */
struct quotactl_ops
*s_qcop;
/* */
struct export_operations *s_export_op; /* */
unsigned long
s_flags;
/* */
unsigned long
s_magic;
/*
*/
struct dentry
*s_root;
/* , */
struct rw_semaphore
s_umount;
/* */
struct semaphore
s_lock;
/* */
int
s_count;
/* */
int
s_syncing;
/* */
int
s_nesd_sync_fs; /* ,
*/
atomic_t
s_active;
/* */
void
*s_security; /* */
struct list_head
s_dirty;
/* */
struct list_head
s_io;
/* */
struct hlist_head
s_anon;
/*
*/
struct list_head
s_files;
/* */
struct block_device
*s_bdev; "
/*
*/
struct list_head
s_instances; /*
*/
struct quota_info
s_dquot;
/* */
char
s_id[32];
/* */
void
*s_fs_info;
/*
*/
struct semaphore
s_vfs_rename_sem; /* */
};

, f s / s u p e r . . a l l o c _
super (). ,
.

271


s_op, . s t r u c t super_operations,
<linux/fs.h>. .
struct super_operations {
struct inode *(*alloc_inode) (struct super_block *sb);
void (*destroy_inode) (struct inode * ) ;
void (*read_inode) (struct inode * ) ;
void (*dirty_inode) (struct inode * ) ;
void (*write_inode) (struct inode *, int);
void (*put inode) (struct inode * ) ;
void (*drop_inode) (struct inode *) ;
void (*delete_inode) (struct inode * ) ;
void (*put_super) (struct super_block * ) ;
void (*write_super) (struct super block * ) ;
int (*sync_fs) (struct super_block *, int};
void (*write_super_lockfs) (struct super_block * ) ;
void (*unlockfs) (struct super_block * ) ;
int (*statfs) (struct super_block *, struct statfs * } ;
int (*remount_fs) (struct super_block *, int *, char * ) ;
void (*clear_inode) (struct inode * ) ;
void (*umount_begin) (struct super block * ) ;
int (*show_options) (struct seq_file *, struct vfsmount * ) ;
};

,
.
.
,
,
. , , .
sb->s_op->write_super(sb);

sb . s_op, , ,
write_super (), .
, write_super () , . , -. C++
.
sb.write_super();

, , .
, super_operations.
272

12

s t r u c t inode * a l l o c _ i n o d e ( s t r u c t super_block *sb)


, .
void d e s t r o y _ i n o d e ( s t r u c t inode *inode) .
void read_inode ( s t r u c t inode *inode)
i n o d e - > i _ i n o
.
void d i r t y _ i n o d e ( s t r u c t inode *inode) VFS, (dirty). (, , ext3)
.
void w r i t e _ i n o d e ( s t r u c t inode inode*, i n t wait) . wait ,
.
void put_inode ( s t r u c t inode *inode) .
void drop_inode ( s t r u c t inode *inode) VFS, .
Unix ,
VFS .
inode_lock.
void d e l e t e _ i n o d e ( s t r u c t inode *inode)
.
void put_super ( s t r u c t super_block *sb) VFS , .
void write_super ( s t r u c t super_block *sb) . VFS .

i n t sync_fs ( s t r u c t super_block *sb, i n t wait) . wait


, .

void write_super_lockfs ( s t r u c t super_block *sb) . (LVM, Logical Volume Manager).


void unlockfs ( s t r u c t super_block *sb) write_super_lockf s ().

i n t s t a t f s ( s t r u c t super_block *sb, s t r u c t s t a t f s * s t a t f s ) VFS ,


s t a t f s .

273

i n t remount_fs (struct super_block *sb, i n t *flags, char *data)


VFS,
.
void clear_inode ( s t r u c t inode *)
VFS ,
.
void umount_begin ( s t r u c t super_block *sb)
VFS .
, NFS.
VFS .
.
. NULL.
NULL, VFS , , .

inode
inode , . Unix
inode VFS. , 4.
s t r u c t inode, <linux/f s.h>. , , .
struct inode {
struct hlist_node
struct list_head
struct list_head
unsigned long
atomic_t
umode_t
unsigned int
uid_t
gid_t
kdev_t
loff_t
struct timespec
struct timespec
struct timespec
unsigned int

i_hash;
i_list;
i_dentry;
i_ino;
i_count;
i_mode;
i_nlink;
i_uid;
i_gid;
i_rdev;
i_size;
i_atime;
i_mtime;
i_ctime;
i_blkbits;

/* */
/* */
/* dentry */
/* */
/* */
/* */
/* */
/* - */
/* - */
/* */
/* */
/* */
/* */
/* */
/* */

, ,
. . , .

274

12

unsigned long
unsigned long
unsigned long
unsigned short
spinlock_t
struct rw_semaphore

i_blksize;
i_version;
i_blocks;
i_bytes;
i_lock;
i_alloc_sem

/*
/*
/*
/*
/*
/*

*/
*/
*/
*/
*/

i_sem */
struct semaphore
i_sem;
/* */
struct inode_operations *i_op;
/* */
struct file_operations *i_fop;
/* */
struct super_block
*i_sb;
/* */
struct file_lock
*i_flock;
/* */
struct address_space *i_mapping;
/*
*/
struct address_space i_data;
/* */
struct dquot
*i_dquot[MAXQUOTAS]; /* */
struct list_head
i_devices;
/* */
struct pipe_inode_info *i_pipe;
/* */
struct block_device *i_bdev;
/* */
unsigned long
i_dnotify_mask; /* */
struct dnotify_struct *i_dnotify;
/* */
unsigned long
i_state;
/* */
unsigned long
dirtied_when /* */
unsigned int
i_flags;
/* */
unsigned char
i_sock;
/* ? */
atomic_t
i_writecount; /* */
void
*i_security; /* */
__u32
i_generation; /* */
union {
void
*generic_ip; /*
*/
} u;
};

( , ). ,
. , s t r u c t inode . , i_pipe
. , NULL. , , i_devices, i_bdev, i_cdev.
, ,
inode. ,
, . . , i_ctime
i_mtime.

275


, i n o d e _ o p e r a t i o n s , , VFS . , .
i->i_op->truncate(i)
i .
I t r a n c a t e ( ) , , i.
i n o d e _ o p e r a t i o n s < l i n u x / f s . h > ,
.
struct inode_operations {
int (*create) (struct inode *, struct dentry *,int);
struct dentry * (*lookup) (struct inode *, struct dentry * ) ;
int (*link) (struct dentry *, struct inode *, struct dentry * ) ;
int (*unlink) (struct inode *, struct dentry * ) ;
int (*symlink) (struct inode *, struct dentry *, const char * ) ;
int (*mkdir) (struct inode *, struct dentry *, int);
int (*rmdir) (struct inode *, struct dentry *);
int (*mknod) (struct inode *, struct dentry *, int, dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry * ) ;
int (*readlink) (struct dentry *, char *, int);
int (*follow_link) (struct dentry *, struct nameidata * ) ;
int (*put_link) (struct dentry *, struct nameidata * ) ;
void (*truncate) (struct inode * ) ;
int (*permission) (struct inode *, int);
int (*setattr) (struct dentry *, struct iattr * ) ;
int (*getattr) (struct vfsmount *, struct dentry *, struct kstat * ) ;
int (*setxattr) (struct dentry *, const char *,
const void *, size_t, int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char * ) ;
};
.
i n t c r e a t e ( s t r u c t inode * d i r , s t r u c t dentry *dentry, i n t mode)
VFS c r e a t ()
open () , (mode) (dentry).

276

s t r u c t dentry * lookup(struct inode * d i r , s t r u c t dentry *dentry)


.
, .

12

i n t l i n k ( s t r u c t ctentry *old_dentry, s t r u c t inode * d i r ,


s t r u c t dentry *dentry)
l i n k ()
(hard link) , old_dentry
d i r . ,
dentry.

i n t u n l i n k ( s t r u c t inode * d i r , s t r u c t dentry *dentry)


unlink () , dentry d i r .

int symlink(struct inode *dir, struct dentry *dentry, const char *symname)
symlink() symnarne ,
dentry d i r .

i n t m k d i r ( s t r u c t inode * d i r , s t r u c t dentry *dentry, i n t mode)


mkdir ()
(mode).

i n t r m d i r ( s t r u c t inode * d i r ,

s t r u c t dentry *dentry)

rmdir ()
dentry d i r .
int mknod (struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
mknod () ( , ), rdev.
d i r , dentry, mode.

i n t rename(struct inode *old_dir, s t r u c t dentry *old_dentry,


s t r u c t inode *new_dir, s t r u c t dentry *new_dentry)
VFS old_dentry o l d _ d i r new_dir
, new_dentry.

i n t r e a d l i n k ( s t r u c t dentry *dentry,

char *buffer,

i n t buflen)

r e a d l i n k ()
buflen , , , .

i n t f o l l o w _ l i n k ( s t r u c t dentry *dentry, s t r u c t nameidata *nd)


VFS
, . dentry, nameidata, nd.
i n t p u t _ l i n k ( s t r u c t dentry *dentry, s t r u c t nameidata* nd)
VFS followlink ().

void truncate ( s t r u c t inode *inode).


VFS . i _ s i z e .

277

i n t p e r m i s s i o n ( s t r u c t inode *inode,

i n t mask)

, ,
inode. , , .
NULL, VFS,
.
,
(ACL), permission ().

i n t s e t a t t r ( s t r u c t dentry *dentry,

struct i a t t r *attr)

notify_change () ,
" " ("change event") .

i n t g e t a t t r ( s t r u c t vfsmount *mnt,
struct kstat *stat)

s t r u c t dentry *dentry,

VFS , .

i n t s e t x a t t r ( s t r u c t dentry *dentry, const char *name,


const void *value, s i z e _ t s i z e , i n t flags)
VFS (extended attributes)5 name value , dentry.

i n t g e t x a t t r ( s t r u c t dentry *dentry, const char *name,


void *value, s i z e _ t size)
VFS
(extended attributes) name value.

ssize_t l i s t x a t t r ( s t r u c t dentry *dentry, char * l i s t , size_t size)



, l i s t .
i n t r e m o v e x a t t r ( s t r u c t dentry *dentry, const char *name)
.

dentry
, VFS ,
. / b i n / v i , b i n , vi ,
b i n , , a vi
. . , VFS
5

, 2.6 ,
/ . ,
.
278

12

, ,
, , ,
.
VFS (directory entry dentry). dentry .
/, bin vi .
, . , dentry , .
.
/mnt/cdrom/foo, /, mnt, cdrom foo dentry.
VFS .
dentry s t r u c t d e n t r y
<linux/dcache.h>. ,
, .
struct dentry {
atomic_t
unsigned long
spinlock_t
struct inode
struct list_head
struct list_head

d_count;
/* */
d_vfs_flags; /* dentry */
d_lock;
/* dentry */
*d_inode; /* */
d_lru;
/* */
d_child;
/*
*/
struct list_head
d_subdirs; /* */
struct list_head
d_alias;
/* (alias)
*/
unsigned long
d_time;
/* */
struct dentry_operations *d_op;
/*
*/
struct super_block
*d_sb;
/* */
unsigned int
d_flags;
/* */
int
d_mounted;
/*
*/
void
*d_fsdata;
/* */
struct rcu_head
d_rcu;
/* RCU (read-copy update) */
struct dcookie_struct *d_cookie; /* cookie- */
struct dentry
*d_parent; /* dentry */
struct qstr
d_name;
/* dentry */
struct hlist_node
d_hash;
/* */
struct hlist_head
*d_bucket; /* - */
unsigned char
d_iname[DNAME_INLINE_LEN_MIN]; /* */
};

, dentry
. VSF .
, s t r u c t dentry
, , (..
).

279


, : fused), (unused) (negative).
(..
d_inode mode)
(.. d_count ). VFS, ,
.
dentry inode
( d_inode ), VFS ( d_count
). , , . , ,
, . , ,
.
dentry6 (
d_inode NULL), ,
.
, .
dentry , , .
dentry , ,
.
VFS, .

dentry
VFS ,
, ,
. , dcache.
dentry .
"" dentry,
( i _ d e n t r y inode).
,
dentry, .
dentry " " (last recently used, LRU).
, , , .
, .
6

. . , , invalid denlry
.

280

12

,
, .
- -, dentry.
- d e n t r y _ h a s h t a b l e .
dentry,
.
.
d_hash ( ) ,
-.
- d_lookup ().
dcache , .
NULL.

, / h o m e / d r a c u l a / s r c / f . . ,
(, , ,
), VFS
: /, home, d r a c u l a , s r e , , foo..
( ) ,
, VFS dentry. ,
.
dentry-, VFS . dentry dcache, .
dcache icache.
inode dentry, dentry
.
dentry mode . , , .
, ,
.


d e n t r y _ o p e r a t i o n s , VFS . <linux/dcache.h> .
struct dentry_operations {
int (*d_revalidate) (struct dentry *, int);
int (*d_hash) (struct dentry *, struct qstr * ) ;
int (*d_corapare) (struct dentry *, struct qstr *, struct qstr * ) ;
int (*d_delete) (struct dentry * ) ;
void (*d_release) (struct dentry * ) ;
void (*d_iput) (struct dentry *, struct inode * ) ;
};

281

i n t d _ r e v a l i d a t e ( s t r u c t dentry *dentry,

i n t flags)

, . VFS , dentry dcache.


NULL, denry,
, .

i n t d _ h a s h ( s t r u c t dentry *dentry,

s t r u c t q s t r *name)

-
dentry. VFS , -.

i n t d_compare(struct dentry *dentry,


s t r u c t q s t r *narael,
s t r u c t q s t r *name2)
VFS
namel name2. VFS,
. , FAT, .
FAT ,
,
. dcache_lock 7 .

i n t d_delete ( s t r u c t dentry *dentry)


VFS, d_count
dentry .
dcache_lock.

void d _ r e l e a s e ( s t r u c t dentry *dentry)


VFS,
dentry. .

void d _ i p u t ( s t r u c t dentry *dentry,

s t r u c t inode *inode)

VFS,
(, ). VFS i p u t (),
inode. , i p u t ()
.

dentry->d_lock. . .

282

12

file
VFS .
File , .
VFS ,
, .
, , .
, , file, ( , ),
, read () w r i t e ().
, . ( )
open () c l o s e () . , , ,
. ,
file.
. (, , ),
. inode dentry, , .
s t r u c t f i l e ,
< l i n u x / f s . h > . , .
struct file {
struct list_head
struct dentry
struct vfsmount

f_list;
/* file*/
*f_dentry; /* dentry */
*f_vfsmnt; /*
*/
struct file_operations *f_op;
/* */
atomic_t
f_count;
/* */
unsigned int
f_flags;
/* , open */
mode_t
f_mode;
/* */
loff_t
f_pos;
/* (file pointer, offset) */
struct fown_struct
f_owner;
/*
*/
unsigned int
f_uid;
/* , UID */
unsigned int
f_gid; /* , GID */
int
f_error;
/* */
struct file_ra_state f_ra;
/* */
unsigned long
f_version; /* */
void
*f_security; /* */
void
*private_data; /* */
struct list_head
f_ep_links; /* eventpoll ( ) */
spinlock_t
f_ep_lock; /* eventpoll */
struct address_space *f_mapping; /* */
};

233

, .
, , (dirty)
. file
dentry f_dentry. dentry , ,
.


VFS, . , s t r u c t f i l e , , Unix.
file_operations
<linux/fs.h> .
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, char *, size_t, loff_t);
ssize_t (*write) (struct file *, const char *, size_t, loff_t * ) ;
ssize_t (*aio_write) (struct kiocb *, const char *, size_t, loff_t);
int (*readdir) (struct file *, void *, filldir_t);
unsigned int (*poll) (struct file *, struct poll_table_struct * ) ;
int (*ioctl) (struct inode *, struct file *,
unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct * ) ;
int (*open) (struct inode *, struct file * ) ;
int (*flush) (struct file * ) ;
int (*release) (struct inode *, struct file * ) ;
int (*fsync) (struct file *, struct dentry *, int);
int (*aio_fsync) (struct kiocb *, int);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock * ) ;
ssize_t (*readv) (struct file *, const struct iovec *,
unsigned long, loff_t * ) ;
ssize_t (*writev) (struct file *, const struct iovec *,
unsigned long, loff_t * ) ;
ssize_t (*sendfile) (struct file *, loff_t *, size_t,
read_actor_t, void * ) ;
ssize_t (*sendpage) (struct file *, struct page *, int,
size_t, loff_t *, int);
unsigned long (*get_unmapped_area) (struct file *, unsigned long,
unsigned long, unsigned long,
unsigned long);
int (*check_flags) (int flags);
int (*dir_notify) (struct file *filp, unsigned long arg);
int (*flock) (struct file *filp, int cmd, struct file_lock *fl);
};

284

12


.
Unix- .
,
. - ,
NULL.
.
loff_t llseek(struct file *file, loff_t offset, int origin)
(file
pointer) offset. lseek().
ssize_t r e a d ( s t r u c t f i l e * f i l e ,
char *buf, size_t count,
loff_t *offset)
count ,
, offset, , buf.
.
read().

ssize_t aio_read(struct kiocb *iocb,


char *buf, size_t count,
loff_t offset)
count
, iocb, ,
buf. aio_read ().

ssize_t w r i t e ( s t r u c t f i l e * f i l e ,
const char *buf, size_t count,
loff_t *offset)
count , offset. write ().

ssize_t aio_write(struct kiocb *iocb,


const char *buf,
size_t count, loff_t offset)
count
, iocb, ,
buf. aio_write.

i n t r e a d d i r ( s t r u c t f i l e * f i l e , void * d i r e n t , f i l l d i r _ t f i l l d i r )
.
readdir ().
unsigned i n t p o l l ( s t r u c t f i l e * f i l e ,
s t r u c t p o l l _ t a b l e _ s t r u c t *poll_table)
, .
p o l l ( ) .

285

int ioctl(struct inode *inode,


struct file *file,
unsigned int cmd,
signed long arg)
, /. ,
. i o c t l ( ) .

i n t mmap(struct f i l e * f i l e ,

s t r u c t vra_area_struct *vma)

mmap().

i n t o p e n ( s t r u c t inode *inode,

struct file *file)


. open ().

i n t f l u s h ( s t r u c t f i l e *file)
VFS,
. .

i n t r e l e a s e ( s t r u c t inode *inode, s t r u c t f i l e *file)


VFS,
, , , , c l o s e () .
.

int fsync(struct file *file,


s t r u c t dentry *dentry,
i n t datasync)
fsync()
.

i n t a i o _ f s y n c ( s t r u c t kiocb *iocb,

i n t datasync)

a i o f sync ()
, iocb.

i n t fasyn (fint fd, s t r u c t f i l e * f i l e , i n t on)


-.
int lock(struct file *file,

i n t cmd,

s t r u c t file_lock *lock)

286

ssize_t readv(struct file * f i l e ,


const s t r u c t iovec *vector,
unsigned long count,
loff_t *offset)
readv () count ,
vector. .

12

ssize_t writev(struct file *file,


const struct iovec *vector,
unsigned long count,
loff_t *offset)
writev () , vector;
count. .

ssize_t sendfile(struct file *file,


loff_t *offset,
size_t size,
read_actor_t actor,
void *target)
s e n d f i l e ()
. .

s s i z e _ t sendpage(struct f i l e * f i l e ,
s t r u c t page *page,
i n t offset, size_t size,
loff_t *pos, i n t more)
.

unsigned long get_unmapped_area(struct f i l e * f i l e ,


unsigned long addr,
unsigned long len,
unsigned long o f f s e t ,
unsigned long flags)

.

i n t check_flags(int flags)
, f c n t l (), SETFL.
VFS, check_flags ().
NFS.
SETFL f c n t l ( ) . NFS O_APPEND O_DIRECT.
i n t f l o c k ( s t r u c t f i l e * f i l p , i n t cmd, s t r u c t file_lock *fl)
flock(), .

287

,

VFS,
, .
, , , ext.3 XFS. .
Linux ,
.
struct file_system_type {
const char
*name;
struct subsystem
subsys;
int
fs_flags;

/* */
/* sysfs */
/* */

/* */
struct super_block * (*get_sb) (struct file_system_type *, int, char*, void * ) ;
/* */
void (*kill_sb) (struct super_block * ) ;
struct module
*owner;
/* ( ) */
struct file_system_type *next;
/* */
struct list_head
fs_supera; /* */
};
get_sb () .
.
f i l e _ s y s tem_type, ,
.
, ,
vfsmount.
, , , .
vfsmount <linux/mount.h> .
struct vfsmount {
struct list_head
struct vfsmount
struct dentry
struct dentry
struct super_block
struct list_head
struct list_head
288

mnt_hash;
/* - */
*mnt_parent; /* */
*mnt_mountpoint; /*
*/
*mnt_root; /*
*/
*mnt_sb;
/* */
mnt_mounts; /* ,
*/
mnt_child; /* , */
12

atornic_t
int
char
struct list_hcad
struct listhead

mnt count; /* */
mnt_flags; /* */
*mnt_devname; /* */
mnt_list; /* */
mnt_fslinkk; /* ,
*/
*mnt_namespace; /* */

struct namespace
};

.
vf smount.
vfsmount mnt_flags. . 12.1 .
1 2 . 1 .

MNT_NOSUID

setuid s e t g i d

MNT_NODEV

MNT_NOEXEC

, , , .

,
, ); , .. VFS ,
. f i l e s _ s t r u c t , fs_struct namespace.
f i l e s _ s t r u c t <linux/file.h>. f i l e s .
. , , .
struct files_struct {
atomic_t
count;
/* */
spinlock_t file_lock;
/* */
int
max_fds;
/* */
int
max_fdset;
/* */
int
next_fd;
/* */
struct file **fd;
/* */
fd_set
*close on exec;/* ,
() */
fd_set
*open_fds;
/* */
fd_set
close_on_exec
init; /*
exec () */
fd_set
open_fds_init; /* */
struct file *fd_array[NR_OPEN_DEFAULT]; /* */
};

289

fd .
fd_array. NR_OPEN_DEFAULT
32, 32 . 32 , fd
.
, .
, , . 32 ,
NR_OPEN_DE_FAULT
. , , f s _ s t r u c t , , , fs .
< l i n u x / f s _ s t r u c t . h > .
struct fs struct {
atomic_t
count;
rwlock_l
lock;
int
umask;

/* */
/* */
/* ,
*/
struct dentry
*root;
/* dentry */
struct dentry
*pwd;
/* dentry */
struct dentry
*allroot; /* dentry */
struct vfsmounL *rootmnt; /* */
struct vfsmount *pwdmnt; /* */
struct vfsmount *altrootrnnt; /* */
};


.
, , namespace,
<linux/namespace.h> namespace
. , ,
Linux 2.4. . , ,
, .
, .
struct namespace {
atomic_t
struct vfsmount
struct list_head
struct rw_semaphore
};

count;
*root;
list;
sem;

/*
/*
/*
/*

*/
*/
*/
*/

l i s t
, .
. f i l e s _ s t r u c t f s _ s t r u c t . ,
290

12

CLONE_FILES CLONE_FS, 8
. ,
f i l e s _ s t r u c t , f s _ s t r u c t .
count , , .
n a m e s p a c e -.
(
). c l o n e ()
CLONE_NEWNS, .
,
. , .


Linux
Linux ,
"" ext2 ext3 , NFS Coda.
Linux 50 .
VFS .
,
Linux, a
Unix.
VFS , , inode, dentry
superblock. 12, " ", ,
.

CLONE_FILES CLONB_FS, iles_struct fs_struct. ,


, .

291

13

-

- ( , - , block devices) ,
(.. )
, .
- , , ,
- (CD-ROM) -. , .
-.
- ( , character device, char device). ,
, , ..
. . (
), .
, ,
, ..
(seek) , . , . .
"fox", , .
- ,
, . -, , . "f", "" "".
, . - ,
,
. , , .

, , -.
, , . ,
, . - . ,
. , . , ,
. .
, - (block I/O layer). ,
- 2.5.
-, 2.6.


.
, , 512 . ,
. , , , .
, 512 ,
(, -
CD-ROM , 2 ).
,
, . , .. , .
, . -
,
. , (
) , . , , ,
(. 11, " " 12, " ") 1 .

.
, , .

294

13

, , .
512 , .
, . , ,
" " (hardware sector) " "
(device block). ,
, " " (filesyst.em block) " -" (I/O block). ""
(sector) "" (block), .
. 13.1 .
.
. , , (clusters),
(cylinder, ) (head).
, , ,
. ,
, , - . , ,
.

. 13.1.


(, ), , (buffer).
. ,
. , , .
.
, (,
),
. , (buffer
head) s t r u c t buffer_head.
-

295

b u f f e r _ h e a d ,
<linux/buffer_head.h>.
, .
. struct buffer_head {
unsigned long
b_state;
atomic_t
b_count;
struct buffer_head *b_this_page;

/* */
/* */
/*
*/
struct page
*b_page;
/* */
sector_t
b_blocknr;
/* */
u32
b_size;
/* ( ) */
char
*b_data;
/* */
struct block_device *b_bdev;
/* */
bh_end_io_t
*b_end_io;
/* - */
void
*b_private;
/* */
struct list_head
b_assoc_buffers; /* */
};

b _ s t a t e . , . 13.1.
b h _ s t a t e _ b i t s , <Linux/buffer_head.h>.
1 3 . 1 . b h _ s t a t e

BH_Uptodate

BH_Dirty

(
, )

BH_Lock

- ,

BH_Req

BH_Mapped

BH_NEW

BH_Async_ Read

_n W r i t e

_Dl

BH_Boundary

b h _ s t a t e _ b i t s
B H _ P r i v a t e S t a r t . , , . , B H _ P r i v a t e S t a r t ;
- , b _ s t a t e .

296

13

, ,
, , -.
b _ c o u n t . , < l i n u x /
b u f f e r _ h e a d . h > .
static inline void get_bh (struct buffer_head *bh)
{
atomic_inc{&bh->b_count);
}
static inline void put_bh (struct buffer_head *bh)
{
atomic_dec (&bh->b_count);
}
, g e t _ b h ( ) , ,
. , ,
put_bh ( ) .
, ,
b _ b l o c k n r ,
b_bdev.
, ,
b_page. b _ d a t a ( - b_page),
b _ s i z e . , , b _ d a t a (b_data + b _ s i z e ) .
(.. , ).
- .
2.6
. , - .
-- , -. . , (
), , -
. ,
,
. ,
( ), . 2.6 ,

, .
15, " ",
a d d r e s s _ s p a c e p d f l u s h .

297

, , ,
.
-, , - buffer_head,

. , 2.5
-. bio, .

b i o
-
bio, <linux/bio.h>. - (segment).
, , .. .
, , bio -,
.
bio , .
struct bio {
sector_t
bi_sector;
struct bio
*bi_next;
struct block_device *bi_bdev;
unsigned long
bi_flags;
unsigned long
bi_rw;
unsigned short bi_vcnt;

/*
/*
/*
/*
/*
/*

*/
*/
*/
*/
? */
bio vec
bi_io_vec */
unsigned short bi_idx;
/* bi_io_vec */
unsigned short bi_phys_segments; /* */
unsigned short bi_hw_segments; /*
*/
unsigned int
bi_size;
/* - */
unsigned int
bi_hw_front_size;/* */
unsigned int
bi_hw_front_size;/*
*/
unsigned int
bi_max_vecs;
/*
bio_vecs */
struct bio_vec *bi_io_vec;
/* bio_vec */
bio_end_io_t
*bi_end_io;
/* - */
atomic_t
bi_cnb;
/* */
void
*bi_private;
/* */
bio_destructor_t *bi_destructor; /* */
};

bio () -. . bi_io_vecs, bi_vcnt


bi_idx.
298

13

bio
bi idx

bi_io_vec

bio_vec bio vec

bio_vec bio_vec

page

page

page

biovec,
bio_vcnt

page,
page
-

. 13.2. struct bio, struct b,io_vec u struct page


bi_io_vecs bio_vec,

-. bio_vec
: < , , >, , ,
, .
. bio_vec <linux/bio.h> .
struct bio_vec {
/* , */
struct
page
*bv_page;
/* */
Unsigned int
bv_len;
/* , */
unsigned
int
bv_offset;
};

- bi_vcnt bio_vec, bi _io_vecs.


- bi_idx .
, -
bio. , bio_vec. , . -
bi_io_vec. .
-

299

b i _ v c n t . , -
, b i _ i d x , . . 13.2
b i o , bio_vec page.
b i _ i d x b i o _ v e c , -
-. ,
, RAID (Redundant Array of Inexpensive/Independent
Disks, / ,
), b i o ,
, , RAID . , RAID,
b i o ,
, b i _ i d x ,
,
-.
b i o ,
b i _ c n t . , ,
. .
void bio_get(struct bio *bio)
void bio_put(struct bio *bio)
, ,
, b i o .
b i o , ,
, .
.
, b i o _ p r i v a t e () . , ,
b i o .


b i o . b i o -,
. ,
, .
- , , , . b i o ,
- .
s t r u c t b u f f e r _ h e a d s t r u c t b i o
.

300

13

bio (. 11),
s t r u c t bio ,
.
bio , (direct) - (.. , ; 15 ).
bio -
- (scatter-gather),
.
bio ,
, -, , .
, .
bio , , -, .
buffer_head .
.


(request queue),
-.
request_queue,
<linux/blkdev.h>.
. , .
, , ,
. ,
s t r u c t request.

s t r u c t request, <linux/blkdev.h>.
bio, . , ,
bio (, , ), bio.

301

-
- , , .
, ,
.
, .
.
- , , , . ,
(, merging) (sorting), -

2. ,
- (J/0 scheduler).

-
-.
-, . -
(. 4, "
"), . , .
, -
.
, .
, , ,
Unix. , -
-.

.

-
-
. , .

, . "" . - "" .

. ,
,
-.

302

13

- : . . , ,
(,
, ). (,
),
, , . - ,
, . , .
,
.
, ,
.
, . .
? -
. - ,
,
( ) . , ,
,
. , (elevator) .
.
, . -
- ( ) ( , elevator).


-,
. -, ,
Linus Elevator ( ). , ,
! - 2.4. 2.6
, .
, .
, . ,
,
. :

303

(front merging) (back merging). -

, .
, .
.
, , , -
, ,
, .
,
,
( , ).
, .
, . ,
, , . ,
. ,
" " . ,
- .
, - 2.4.
, .
.
, .
, ,
, .
, ,
, .
, , .

-
- (Deadline I/O scheduler, deadline- -) , .
,
- , .
, ,

13

, ,
. .
,
(writes-starving-reads).
, ,
, . . ,
, , , ..
, .
, , ( , ),
(, )
. 3
, " " . ,
.
, .
, .
, . (
), . , , ,
. ,
(
), - ,
.
,
.
,
. ( ),
. ,
. deadline-
. .

. .
, , .

305

-, (expiration time).
500 5
. -
.
(sorted queue). , deadlme- - , 4.
, , . FIFO , FIFO
. ,
FIFO (first-in first-out , ) , . deadline- -
. .
.
, FIFO- FIFO , (.. , , ,
), deadline- FIFO.
, (. 13.3).

FIFO
FIFO


. 13.3.

, deadline- - . , ,
, .
.
, , ,

. .
4

deadline- .
,
, , .

306

13

-
drivers/block/deadline-iosched..

-
-
, . . , , .
, ,
, ,
. , ( )
. - (anticipatory I / O scheduler)
.
-
. . ( )
, deadline-. (anticipation heuristic).
- "
", . , .
, .
( , 6 ). ,
. .
. ,
.
, ,
(.. ,
), ,
. ,
.
, .
, , .

. .
-
.


,
.
,
.
d r i v e r s / b l o c k /
a s - i o s c h e d . .
Linux
. , ,
, , , , .

-

- (Complete
Fair Queuing, CFQ) ,
. -.
CFQ -
, .
, foo foo, bar
bar. . ,
-.
CFQ , ,
-.
CFQ , ( 4)
, .

.
, , , , .
CFQ .

308

13

CFQ d r i v e r s / b l o c k / c f q - i o s c h e d . .
,
, , , .

- n
, , -
noop (no operation, ). , .
.
, ,
.
- n , . ,
. , , FIFO.
n . ,
, . , , -.
, ,
, n .
n drivers/block/noop-iosched..
.

-
2.6 -. .
-. , elevator=<apo>
, <> , . 13.2.
13.2. e l e v a t o r

as

cfq

deadline

noop

(n)

, elevator=cfq , .

309


-,
, - :
bio, -; buffer_head, ;
r e q u e s t , -.
- ,
, -. , -, ,
Linux, 2.4
.
.

310

13

11, " ", ,


. ,
, , ,
. Linux
(virtual memory operating system),
.. .
,
. ,
. ,
.
, , , , ,
. ""
32- 64- . "" , (, 32-
0 429496729).

, ..
.
. . . .

. .

, , , 41021f000. 32- . , ,
, , 08048000-0804000. (memory area).
.

.
,
. ,
, ,
"Segmentation Fault" ( ).
.

, (text section).
, (data section).
, , ,
bss1 (bss section). (zero page, , ) ,
, ,
.
, , , (
, ).
, BSS
, libc ,
.
, .
.
, , ,
2
malloc() .
(
). , : , ,
, ..

"BSS" . block started


by symbol (,
). , . ,
( ).
( ) ,

.
2

giibc m a l l o c ( )
(), brk().

312

14


, . ,
. s t r u c t mm_struct,
<1inux/sched.h> 3 .
,
.
struct mm_struct {
struct vm_area_struct *mmap;
/* */
struct rb_root
mm_rb;
/* - */
struct vm_area_struct *mmap_cache; /* */
unsigned long
free_area_cache; /*
*/
pgd_t
*pgd;
/* */
atomic_t
mm_users;
/*
*/
atomic_t
mm_count; /* */
int
map_count; /* */
struct rw_semaphore mmap_sem; /* */
spinlock_t
page_table_lock; /* - */
struct list_head
mmlist;
/* mm_struct */
unsigned long
start_code; /* */
unsigned long
end code; /* */
unsigned long
start_data; /* */
unsigned long
end_data;
/* */
unsigned long
start_brk; /* "" */
unsigned long
brk;
/* "" */
unsigned long
start_stack; /* */
unsigned long
arg_start; /* */
unsigned long
arg_end;
/* */
unsigned long env_start; /* */
unsigned long
env_end;
/* */
unsigned long
rss;
/* */
unsigned long
total_vm;
/* */
unsigned long
locked_vm; /*
*/
unsigned long
def_flags; /* ,
*/
unsigned long
cpu_vm_mask; /*MacKa TLB */
unsigned long
swap_address; /* */
unsigned
dumpable:l;
/* core? */
int
used_hugetlb; /*
(hugetlb)? */

,
. s t r u c t mm_struct sched.h.

313

mm_context_t

context;

/* ,
*/
int
core_waiters; /* ,
core */
struct completion
*core_startup_donc; /*
core */
struct completion
core_done; /*
core */
rwlock_t
ioctx_l.ist_lock; /*
- (AIO) */
struct kioctx
*ioctx_list; /* - (AIO) V
struct kioctx
default kioctx; /* , */
};
mm_users , . , , mm_users .
ram_count mm_struct.
, mm_users, mm_count .
mm_count . mm_users
(.. ), mm_count . mm_count ,
m m _ s t r u c t , ,
(mm_count) ,
(mm_users).
mmap m m _ r b ,
: .
, - .
- , ,
,
(log (n) ). - " ".
,
,
. mmap , . ,
mm_rb - , . ,
mm_struct
mmlist. init_mm, ink. m m l i s t _ l o c k , k e r n e l / f o r k . .
mmlist_nr, .

314

14


, - ,
mm . , current->rnm
. copy_mm() frk ( ) . m m _ s t r u c t
mm_cachep allocate_mm ().
k e r n e l / f o r k . . m m _ s t r u c t .

, CLONE_VM c l o n e ( ) . . 3, " ", Linux
. Linux .
, .
, CLONE_VM, a l l o c a t e _ m m ( ) ,
mm
. . _mm ().
if (clone_flags & CLONE_VM) {
/*
* c u r r e n t
* t s k , fork()
*/
atomic_inc(&current->mm->mm_users);
tsk->mm = current->mm;
}


, , , exit_mm() . . input() ,
mm_users . , m m d r o p ( ) ,
mm_count.
,
free_mm(), mm_struct mm_cachep kmem_cache_f(), -.
.

315

mm_struct
,
, . mm
NULL. , .
,
(, ?). , ( ).
,
, , . ,
, , .
, , mm . a c t i v e _ m m
, . ,
mm NULL. , ,
, mm NULL,
. active_mm
,
.
.
,
, .


(memory areas)
, v m _ a r e a _ s t r u c t . <linux/mm.h>. (virtual memory area, VMA).
v m _ a r e a _ s t r u c t . , . , . ,
VMA , , , . - , VFS
( . 12, " ").
, .

316

14

struct vm_area_struct {
struct mm_struct
*vm_mm; /* mm_struct */
unsigned long
vm_start;
/* */
unsigned
long
vm_end;
/* */
struct vm_area_struct *vm_next;
/* VMA */
pgprot_t
vm_page_prot; /* */
unsigned long
vm_flags;
/* */
struct rb_node
vm_rb;
/* VMA */
union { /* address_space->i_mmap, i_mmap_nonlinear */
struct {
struct list_head
list;
void
*parent;
struct vm_area_struct *head;
} vm_set;
struct prio_tree_node prio_tree_node;
} shared;
struct list_head
anon_vma_node; /* */
struct anon_vma
*anon_vma;
/* VMA */
struct vm_operations_struct
*vm_ops; /* */
unsigned long
vm_pgoff;
/* */
struct file
*vm_file;
/* ( ) */
void
*vm_private_data; /* */
};

, () . vm_start
() , vm_end ()
. , (vm_end - vm_start)
() . .
vm_mm mm_struct,
VMA. , VMA mm_struct,
. , ,
vm_area_struct,
. , ,
,
vm_area_struct .

VMA
vm_flags ,
<linux/mm.h>.
, . , , VMA ,
, . , vm_flags , , . ,
. . 14.1
vm_flags.

317

1 4 . 1 . VMA

VM_READ

VM_WRITE

VM_EXEC

VM_SHARED

VM_MAYREAD

VM_READ

VM_MAYWRITE

V M _ W R I T E

VM_MAYEXEC

VM_EXEC

VM_MAYSHARE

VM_SHARED

VM_GROWSDOWN

""

VM_GROWSUP

""

VM_SHM

( )

VM_DENYWRITE

VM_EXECUTABLE

VM_LOCKED

VM_IQ

VM_SEQ_READ

, ,

VM_RAND_READ

, ,

VM_DONTCOPY

f o r k ()

VM_DONTEXPAND

remap ()

VM_RESERVED

VM_ACCOUNT

VM_HUGETLB

( h u g e t l b )

VM_NONLINEAR

.
VM_READ, VM_WRITE VM_EXEC - , . . ,
VM_READ VM_EXEC, VM_WRITE.
,
VM_READ VM_WRITE, VM_EXEC . , , VM_READ.
VM_SHARED , ,
.
, (shared
mapping), . , ,
(private mapping).

318

14

VM_IO , - .
mmap () - . , ,
core . VM_RESERVED , . - .
VM_SEQ_READ , (.. ) .
(read-ahead) . VM_RAND_READ
, .. (.. ).
. madvice ()
MADV_SEQUENTIAL MADV_RANDOM . , , ,
. ,
.
, .

VMA
vm_ops vm_area_struct ,
VMA. vm_area_struct , , .
vm_operations_struct,
<linux/mm.h> .
struct vm_operations_struct {
void (*open) (struct vm_area_struct *) ;
void (*close) (struct vm_area_struct * ) ;
struct page * (*nopage) (struct vm_area_struct *, unsigned long, int);
int (*populate) (struct vm_area struct *, unsigned long,
unsigned long, pgprot_t, unsigned long, int);
};
.
void open (struct vm_area_struct *area)
,
.
void close(struct vm_area_struct *area)
,
.

319

struct page * nopage(struct vm_area_sruct *area,


unsigned long address,
int unused)
-
(page fault), , .
int populate {struct vm_area_struct *area,
unsigned long address,
unsigned long len, pgprot_t prot,
unsigned long pgoff, int nonblock)
. remap_pages() (prefault)
.


,
: mmap mm_rb. ,
.
v m _ a r e a _ s t r u c t , -.
, mmap, . v m _ a r e a _ s t r u c t vm_next.
( ). vm a r e a _ s t r u c t , mmap.
NULL.
, m m _ r b , - (red-black) . m m _ r b ,
vm_area s t r u c t vm_rb.
- .
- . .
: . , , , , , . ,
, , , ,
, , . ,
( , ) :
. . ,
(log (n) ).
, . ,
. ,
, .
320

14



.
/ pm (1).
, .
, , .
int main(int argc, char *argv[])
return 0;
}
.
. , ,
bss. ,
,
l i b c . s o l d . s o . ,
.
/proc/<pid>/maps .
rml@phantasy:~$ cat /proc/1426/maps
00e80000-00faf000 r-xp 00000000 03:01
00faf000-00fb2000 rw-p 0012fOOO 03:01
00fb2000-00fb4000 rw-p 00000000 00:00
08048000-08049000 r-xp 00000000 03:03
08049000-0804a000 rw-p 00000000 03:03
40000000-40015000 r-xp 00000000 03:01
40015000-40016000 rw-p 00015000 03:01
4001e000-4001f000 rw-p 00000000 00:00
bfffe000-c0000000 rwxp fffffOOO 00:00

208530
208530
0
439029
439029
80276
80276
0
0

/lib/tls/libc-2.3.2.so
/lib/tls/libc-2.3.2.so
/home/rml/src/example
/home/rml/src/example
/lib/ld-2.3.2.so
/lib/ld-2.3.2.so

.
-

m ( 1 ) , .
rml@phantasy:~$ pmap 1426
example[1426]
OOe8OOOO (1212 KB) r-xp (03:01
OOfafOOO (12 KB)
rw-p (03:01
00fb2000 (8 KB)
rw-p (00:00
08048000 (4 KB)
r-xp (03:03
08049000 (4 KB)
rw-p (03:03
40000000 (84 KB)
r-xp (03:01

208530)
208530)
0)
439029)
439029)
80276)

/lib/tls/libc-2.3.2.so
/lib/tls/libc-2.3.2.so
/home/rml/src/example
/home/rml/src/example
/lib/ld-2.3.2.so

pmap(l) . , , /,
. procps.

321

40015000 (4KB)
4001e000 (4 KB)
bfffeOOO (8 KB)
mapped: 1340 KB

rw-p (03:01 80276)


rw-p (00:00 0)
rwxp (00:00 0)
writable/private: 40 KB

/lib/ld-2.3.2.so

shared: 0 KB

,
bss l i b c . s o ( ). .
, bss ld. so (
). .
, , . ,
bss, ,
, .
1340 , 40
.
,
.
; , , , . ,
, ( ),
, . 1212 , 1212 , , . , ,
1340 , 40
. - .
, ,
00:00
. , (zero page,
). , , , ,
. ,
, , bss.
, , vm_
a r e a _ s t r u c t . (thread),
min_struct, t a s k _ s t r u c t .


, , , .
mmap () , ,
.
<linux/mm.h>.

322

14

find_vma()
f ind_vma () mm/mmap..
, vm_end addr.
, , addr , addr. , NULL.

vm_area_struct. , VMA , addr, , . find_vma ()
map_cache . ,
,
( 30-40%).
. ,
, .
- .
struct vm_area_struct * find_vma(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma = NULL;
if (mm) {
vma = mm->mmap_cache;
if (! (vma && vma->vm_end > addr && vma->vm start <= addr)) {
struct rb node * rb_node;
rb node = mm->mm_rb.rb_node;
vma = NULL;
while (rb_node) {
struct vm_area_struct * vma_tmp;
vma_tmp = rb_entry (rb_node,
struct vm_area_struct, vm_rb);
if (vma_tmp->vm_end > addr) {
vma = vma_tmp;
if (vma_tmp->vm_start <= addr)
break;
rb_node = rb_node->rb_left;
} else
rb_node = rb_node->rb_right;
}
if (vma)
mm->mmap_cache = vma;
}
}
return vma;
}

323

vma_cache ,
VMA . ,
, vm_end addr,
, ,
addr. , , . ,

VMA.
VMA, - .
vma_end addr,
, .
, , a d d r .
VMA , , addr.
, NULL.

find_vma_prev()
find_vma_prev () f i n d vma () , VMA,
addr. mma/mmap.c <linux/ram.h> .
struct vm_area_struct * find vma_prev (struct mm_struct *mm,
unsigned long addr, struct vm_area_struct **pprev)
pprev VMA.

find_VMA_intersection()
f ind_vma_intersection () , . <linux/mm.h> . .
static inline struct vm_area_struct * find_vma_intersection(
struct mm_struct *mm, unsigned long start_addr, unsigned long end addr)
{
struct vm_area_struct *vma;
vma = find_vma (mm, start_addr) ;
if (vma && end_addr <= vma->vm_start)
vma = NULL;
return vma;
}
, , s t a r t _ a d d r , end_addr .
, find_vma() NULL,
f i n d _ v m a _ i n t e r s e c t i o n ( ) .
324

14

find_vma () VMA, find_vma_inters e c t i o n () ,


. ,
find_vma (), , f ind_vma_intersection () NULL.

mmap() do_mmap():

do_mmap() . , VMA,
,
,
. , VMA. do_mmap() ,
, ,
VMA .
do_ramap() <linux/mm.h> .
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flag, unsigned long offset)

f i l e offset; len .
f i l e offset , () .
(anonymous mapping). , , (file-backed mapping).

addr (, ), .
p r o t .

<asm/mman.h>. ,
. 14.2.
flags VMA.
<asm/mman.h> . 14.3.
14.2.

PROT_READ


VM_READ

PROT_WRITE

VM_WRITE

PROT_EXEC

VM_EXEC

PROT_NONE

325

14.3.

MAP_SHARED

MAP_PRIVATE

MAP_FIXED


addr

MAP_ANONYMOUS

MAP_GROWSDOWN

VM_GROWSDOWN

MAP_DENYWRIIE

VM DENYWRITE

MAP_EXECUTABLE

VM_EXECUTABLE

MAP_LOCKED

VM_LOCKED

MAP_NORESERVE

MAP_POPULATE

(prefault)

MAP_NONBLOCK

- ,
do_mmap() . . ,
. ,
v m _ a r e a _ s t r u c t , vm_area_cachep.
-
vma_link() .
total_vm . , .

mmap()
do_mmap()
mmap() , .
void * mm2 (void *start,
size_t length,
int prot,
int flags,
int fd,
off_t pgoff)
mmap2() ,.. mmap().
mmap() , , mmap2(), . . mmap() ,
POSTX, , mmap() ,
.
mmap2() . mmap2 (),
mmap()
.
326

14

munmap() do_munmap():

do_manmap() . <asm/mman.h> .
int do_munmap(struct mm_struct *mm, unsigned long start, size t_len)
, , s t a r t l e n .
, .

munmap()
munmap () , . mmap ()
.
int munmap(void *start, size_t length)
(wrapper) do_munmap ( ) .
asmlinkage long sys_munmap(unsigned long addr, size_t len)
{
int ret;
struct mm_struct *mm;
mm = current->mm;
down_write(&mm->mmap_sem);
ret = do_munmap(mm, addr, len);
p_write(&mm->mmap_sem);
return ret;
}


,
, . ,
, ,
.
. . ()
. , .

327

Linux 3.
64- . , ,
32- , . Linux ,
(,
).
" ". .
(page global directory, PGD). PGD
pgd_t. pgd_t
unsigned long. PGD , PMD.
;
(page middle directory, PMD). PMD
prad_t. PMD (page table entry,
).
pte_t. .

( ).
. ,
. . 14.1
, .
PGD

PMD

mm_struct

page

. 14.1.

2.6.11 Linux 64-


4-, . 32- 3 , . . .

328

14

(,
). pgd .
p a g e _ t a b l e _ l o c k , .
, , <asm/page.h>.
,
.
. ,
(translation loo-

kaside buffer, TLB), , .


, TLB.
, .
.
,
. 2.6
. , , .
fork(). ,
, . , fork().

, . ,
( s t r u c t mm_struct) ( s t r uct vrn_area_struct). , (
mmap()) ( munmap()) . . Linux
,
.
- , -
. !

329

15

Linux ,
(page cache). -
, ,
.
. -,
, .
, . -,
,
. ,

, (temporal locality). ,
,
.
, .
. - (, , -, ,
), , . ,
.

-. 13,
" -",
. , .
- , -

- .
" ", ,
.
, .

-, read() w r i t e ( ) . - , , , .
. .
-
. -
. bread() , .

.
, , . .
, . .
,
,
.


, (page cache) ; .
, , . , , ,
. -, ,
, r e a d ( ) 1 , , , ,
. ,
.

address_space

2.
1

12 ," ", - read() write(),



file->f_op->read()
file~>f_op->wriie().
2
, 86 4 ,

512 . , 8 . , "" .

332

15

, .
,
, .
, Linux ,
.
System V (SVR 4) .
, SVR 4
, s t r u c t vnode.
Linux , ,
.
Linux address_space ( ),
, .
<linux/fs.h> .
struct address_space {
struct inode

/* ,
*/
/* */
struct radix_tree_root page_tree;
/*
spinlock_ t
tree_lock;
page_tree */
/*
unsigned int
i_mmap_wrltable;
VM_SHARED */
/* */
struct prio_tree_root i_mmap;
/*
VM_NONLINEAR */
struct list_head
i_mmap_nonlinear;
/* i_mmap */
spinlock_t
i_mmap_lock;
/* truncate */
atomic_t
truncate_counl;
/* */
unsigned long
nrpages;
pgoff_t
writeback_index; /* */
struct address_space_operations *a_ops; /* */
/* gfp_mask */
unsigned long
flags;
struct backing_dev_info *backing_dev_info; /* */
/* */
spinlock_t
private_lock;
/* */
struct list_head
private_list;
/* */
struct address_spacs
*assoc_mapping;
};
*host;

i_mmap .
3
.
nrpages .

,
Edward M. McCreight, SIAM Journal of Computing, May 1985, vol. 14.
2 , P. 257-276.

333

a d d r e s s s p a c e ,
. , host . host NULL,
; , address_space
(swapper).
a_ops ,
VFS. s t r u c t address_space_operations,
<linux/fs.h> .
struct address_space_operations {
int (*writepage) (struct page *, struct writeback_control * ) ;
int (*readpage) (struct file *, struct page * ) ;
int (*sync_page) (struct page * ) ;
int (*writepages) (struct address_space *,
struct writeback_control * ) ;
int (*set_page_dirty) (struct page * ) ;
int (*readpages) (struct file *, struct address_space *,
struct list_head *, unsigned);
int (*prepare_write) (struct file *, struct page *,
unsigned, unsigned);
int (*commit_write) (struct file *, struct page *,
unsigned, unsigned);
sector_t (*bmap)(struct address_space *, sector_t);
int (*invalidatepage) (struct page *, unsigned long);
int (*releasepage) (struct page *, int);
int (*direct_IO) (int, struct kiocb *, const struct iovec *,
loff_t, unsigned long);
};
r e a d _ p a g e w r i t e _ p a g e .
, .
: a d d r e s s _
s p a c e . .
page = find_get_page(mapping, index);
mapping , a index - .
, .
struct page *cached_page;
int error;
cached_page = page_cache_alloc_cold (mapping);
if (!cached_page)
/* */
error = add_to_page_cache_lru (cached_page, mapping, index, GFP_KERNEL);
if (error)
/* */
334

15

, , . .
error = mapping->a_ops->readpage(file, page);
.

.
SetPageDirty(page);

w r i t e p a g e ( ) . , ( ), . , , m m / f i l e m a p . ,
.
page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec);
status a_ops->prepare_write(file, page, offset, offset+bytes);
page_fault = filemap_copy_from_user(page, offset, buf, bytes);
status = a_ops->commit_write(file, page, offset, offset+bytes};
.
, . p r e p a r e _ w r i t e (), . .
c o m n i t _ w r i t e ( ) .

-, - .
. ,
. " ". , .


,
-, .
( , ,
).
,
a d d r e s s _ s p a c e .
a d d r e s s _ s p a c e (radix tree), p a g e t r e e . - .
. , f i n d _ g e t _ p a g e () r a d i x _ t r e e _ l o o k u p (), .

335

l i b /
r a d i x - t r e e . . <linux/radix-tree.h>.

-
2.6 . -
. - , . ,
. , - NULL.
- .
- .
. .
- ,
, , .
(
) - , .
- , .
2.6 .


Linux .
2.2 : . , . . .
,
.
2.2 , Linux 2.4
. .
, . , , .

336

15

pdflush
(dirty, "") - . .
,
,
.
,
, , .
.
(. ).
2.6 (gang4) pdflush,
( pdflush). ,
pdflush "dirty page flush" (" "). ,
, .
-, pdflush , .
,
, . , , d i r ty_background_ratio s y s c t l .
, wakeup_bdf l u s h ()5
pdflush, background_writeout ().
, ,
.
, .
.


dirty_background_ratio.

, pdf l u s h . , , p d f l u s h
.
-, pdflush
( )

"gang" . ,
-, .

, . wakeup_pdflush ().
, .

337

. ,
. , ,
. , .
, pdflush, wb_kupdate ( ) .
, d i r t y _ e x p i r e _ c e n t i ses . ,
d i r t y _ e x p i r e _ c e n t i s e c s .
p d f l u s h , , .

/proc/sys/vrn s y s c t l . . 15.1 .
15.1. p d f l u s h

dirty_background_ratio

,
p d f l u s h

dirty_expire_centisecs

, , ,
p d f l u s h

dirty_ratio

,
,

dirty_writeback_centisecs

, , b d f l u s h

laptop_mode

,
(. )

p d f l u s h m m / p a g e - w r i t e b a c k . c f s /
fs-writeback..



.
, . /proc/sys/vm/laptop_mode.
0 . 1
.

. ; ,
, p d f l u s h
-,
338

15

. p d f l u s h ,
, ,
.
, d i r t y _ e x p i r e _ c e n t i s e c s
d i r t y _ w r i t e b a c k _ c e n t i s e c s , 10 .
, -
, , .
Linux , p b f l u s h ,
.
, .

bdflush kupdated
2.6 p d f l u s h b d f l u s h kupdated.
b d f l u s h , .
,
p d f l u s h . b d f l u s h wakeup_bdflush () ,
.
b d f l u s h p d f l u s h .
, b d f l u s h , p d f l u s h
. . , b d f l u s h ,
. p d f l u s h , . ,
, -
, . ,
, ,
.
b d f l u s h , , kupdated, . ,
wb_kupdate () p d f l u s h .
b d f l u s h kupdated
pdflush.

:

b d f l u s h ,
b d f l u s h . ,

339

b d f l u s h - ,
. , . ,
, , b d f l u s h
. ,
, , .
, ,
.
.
.
2.6
p d f l u s h . , p d f l u s h .

. p d f l u s h , p d f l u s h .
MAX_PDFLUSH_THREADS,
8. , p d f l u s h , .
, , MIN_PDFLUSH_THREADS, 2. , p d f l u s h ,
, . p d f l u s h
, . , ,
. , p d f l u s h , .
ce , p d f l u s h
?
p d f l u s h ,
. ,
p d f l u s h (congestion avoidance). , . p d f l u s h
.
p d f l u s h , ,
.
,
b d f l u s h , 2.6
,
. p d f l u s h
.

340

15


.
, -,
pdf lush.

, . , Linux

.

341

16

, , Linux
,
. , ,
, ,
. ,
. , ,
.
,
, , .

"Hello,World!"
,
,
, , ,
.
, , "Hello World!", - . ,
, "Hello, World!".
/*
* hello. - Hello, World!
/*
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
/*
* hello_init - , ,
* ,
* .
*/

static int hello_init(void)


{
printk(KERN_ALERT "I bear a charmed life.\n");
return 0;
}
/*
* hello_exit - , .
*/
static void hello_exit (void)
{
printk(KERN_ALERT "Out, out, brief candle!\n");
}
module_init(hello_init);
module_exit(hello_exit);
MODULE_LICENSE{"GPL");
MODULE_AUTHOR("Shakespeare");
, . h e l l o _
i n i t () m o d u l e _ i n i t ()
. . m o d u l e _ i n i t ()
, , . .
int my_init(void);
, s t a t i c .
i n t .
( , ) ,
. .
. , ..
,
.
h e l l o _ e x i t ()
m o d u l e _ e x i t (). h e l l o _ e x i t (),
. , ,
, .. , .
.
void my_exit(void);
, s t a t i c .

344

16

, ( ,
).
MODULE_LICENSE ()
. , GPL, t a i n t e d (, ).
, , , . , ,
GPL, ,
" GPL" (. " " ).
, MODULE_AUTHOR () .
.


"kbuild", 2.6 , . ,
, , .
.
. ,
.


. , .
, .
d r i v e r s / , .
, .
d r i v e r s / c h a r / ,
d r i v e r s / b l o c k / , USB d r i v e r s / u s b / .
, USB . .
,
, Fish Master XL 2000 Titanium . ,
f i s h i n g d r i v e r s / c h a r / .
M a k e f i l e , d r i v e r s / c h a r / . d r i v e r s / c h a r /
Makefile .
obj-m += fishing/

345

,
f i s h i n g / . , , , CONFIG_FISHING_POLE
( ,
" "). .
obj-$(CONFIG_FISHING_POLE) += fishing/
, d r i v e r s / c h a r / f i s h i n g
Makefile, .
obj-m += fishing.
f i s h i n g / f i s h i n g . f i s h i n g . . , .,

..
, , , Makefile
.
obj-$(CONFIG_FISHING_POLE) += fishing.
. ,
.
! Makefile .
obj-$(CONFIG_FISHING_POLE) += fishing.
fishing-objs := fishing-main. fishing-line.
f i s h i n g - m a i n .
f i s h i n g - l i n e . f i s h i n g . .
, gcc . Makefile .
EXTRA_CFLAGS += -DTITANIUM_POLE
d r i v e r s / c h a r / ,
,
(, M a k e f i l e d r i v e r s /
c h a r / f i s h i n g / ) d r i v e r s / c h a r / M a k e f i l e .
, . ,
CONFIG_FISHING_POLE, .

346

16



,
Makefile , .
obj-m := fishing.
fishing. fishing. .
,
.
obj-m := fishing.
fishing-objs := fishing-main. fishing-line.
f i s h i n g - m a i n .
f i s h i n g - l i n e . f i s h i n g , .
, ,
.
, make
Makefile . .
make - /kerncl/source/location SUBDTRS=$PWD modules
/ k e r n e l / s o u r c e / l o c a t i o n . ,
, , / u s r / s r c / l i n u x ,
- , - .


/lib/modules/
version/kernel. , 2.6.10
/ l i b / m o d u l e s / 2 . 6 . 1 0 / k e r n e l / d r i v e r s / c h a r /
f i s h i n g . , d r i v e r s /
char/.
.
make modules_install
, root.


Linux . , chum b a i t ,
chum b a i t . .
Linux

347

. root .
depmod
, , root
.
depmod -A
/ l i b / m o d u l e s /
version/modules.dep.


insmod.
. ,
. insmod .
. root
insmod module
module , .
.
insmod fishing
rmmod. root .
rmmod module
, .
rmmod fishing
, . modprobe ,
, ,
. .
modprobe root
modprobe module [ module parameters ]
module , . ,
. .
modprobe , , . , .

348

16

modprobe .
root .
modprobe Pr modules
modules ,
. rmmod, modprobe , , .
Linux
, .



, CONFIG_FISHING_POLE.
,
.
"kbuild",
2.6,
. , , Kconf ig,
.
,
. d r i v e r s / c h a r / , drivers/char/Kconfig.
, ,
Kconf ig.
source Tdrivers/char/fishing/Kconfig
Kconfig, drivers/char/Kconfig.
Kconfig . .
config FISHING_POLE
tristate "Fish Master XL support"
default n
help
If you say Y here, support for the Fish Master XL 2000 Titanium with
computer interface will be compiled into the kernel
and accessible via
device node. You can also say M here and the driver will be
built as a
module named fishing.ko.
If unsure, say N.

349

, .
, CONFIG_ , .
, (tristate),
: (Y),
() (N).
,
, (, .
)
bool t r i s t a t e . , ,
.
, .
h e l p , .
.
, , . , , , , ,
.
. depends
,
, . , . ,
.
depends on FISH_TANK
, , CONFIG_FISH_TANK.
s e l e c t d e p e n d s , ,
, . ,
depends, .
.
select BAIT
CONF'IG_BAIT CONFIG_FISHING_POLE.
s e l e c t , d e p e n d s &&. depends ,
. ,
, ,
CONFIG_DUMB_DRIVERS CONFIG_NO_FISHING_ALLOWED.
depends on DUMB_DRIVERS && !NO_FISHING_ALLOWED

350

16

t r i s t a t e b o o l if,
. ,
, . , , "Deep Sea Mode" ,
CONFIG_OKEAN.
bool TDeep Sea ModeY if OCEAN
if d e f a u l t , ,
, ,
if.
, . CONFIG_EMBEDDED
, , , ( ). CONFIG_BROKEN_ON_SMP
, ,
. ,
, SMP. .
CONFIG_EXPERIMENTAL
.
, .


Linux , . .
sysfs (. 17, "
kobject sysf ").
.
module_param ()
.
module_param(name, type, perm);
name , , , . t y p e . : b y t e , s h o r t , u s h o r t , i n t . u i n t ,
l o n g , u l o n g , c h a r p , b o o l i n v b o o l .
: ; ; ;
; ; ; ; ; ; ,
, . b y t e char, i n t . . , perm -

351

sysfs. , 0644 (
, ,
), , " | " , S_IRUGO | S_IWUSR (
, ).
, sysfs
.
. , . .
/* , bait */
static int allow live bait = 1;
/* */
module_param(allow_live_bait, bool, 0644); /* */
, ..
a l l o w _ l i v e _ b a i t .
, . module_param_named ().
module_param_named(name, variable, type, perm);
name , a v a r i a b l e , .
static unsigned int max_test = DEFAULT__LINE_TEST;
module_param_named (maximum_line_test, max_test, int, 0 ) ;
, , charp.
, .
static char *name;
module_param(name, charp, 0 ) ;

, .
module_param_string().
module_param_string(name, string, len, perm);
name , s t r i n g ,
, l e n string
( , , , ,
), perm sysfs (
sysfs). .
static char species[BUF_LEN];
module_param_string (specif ies, species, BUF_LEN, 0 ) ;
, . -

352

16

module_param_array() .
module_param array(name, type, nump, perm);
name , type , a perm
sysfs. nump
, , .
, , name,
. , . ,
.
static int fish[MAX_FISH];
static int nr_fish;
module_param_array(fish, int, &nr_fish, 0444);
, ,
module_param_array_named ().
module_param_array_named(name, array, type, nump, perm);
.
, , MODULE_
PARM_DESC() .
static unsigned short size = 1;
module_param(size, ushort, 0644);
MODULE_PARM__DESC(size, "The size in inches of the fishing pole " \
"connected to this computer.");

<linux/moduleparam.h>.


.
, ( ), .
EXPORT_SYMBOL () EXPORT_
SYMBOL_GPL().
, , .
, , .
,
. ( , s t a t i c ) , . , ,
s t a t i c .
, , ( ) API .

353

. , EXPORT_SYMBOL ().
/*
* get_pirate_beard_color
* pirate ,
* <linux/beard_colors.h>
*/
int get_pirate_beard_color(void)
{
return pirate->beard->color;
}
EXPORT_SYMBOL(get_pirate_beard_color);
, g e t _ p i r a t e _ b e a r d _ c o l o r ( )
.
,
GPL.
MODULE_LICENSE ( ) . ,
, GPL, .
EXPORT_SYMBOL_GPL(get_pirate_beard_color);
,
, .
.


, , . , Linux, ,
. . ( )

, .
k o b j e c t sysf's,
, ,
.

354

16

17
kobject

sysfs

, 2.6. .

.
.
, , .
, ,
.
, .
.

, , .
.

.
, , . , , ,
() . , USB-

, USB,
USB PCI.
, .

k o b j e c t
kobject,
s t r u c t k o b j e c t ,
< l i n u x / k o b j e c t . h > . kobject Object - , # Java.
, , , , .
, kobject,
.
struct kobject {
char
*k_name;
char
name[KOBJ_NAME_LEN];
struct kref
kref;
struct list_head entry;
struct kobject
*parent;
struct kset
*kset;
struct kobj_type *ktype;
struct dentry
*dentry;
};
k_name .
KOBJ_NAME_LEN, 20 , name, a
kname .
KOBJ_NAME_LEN , , , ,
k_name .
p a r e n t kobject.
, k o b j e c t , . , sysfs

kobject, .
d e n t r y s t r u c t dentry, sysfs.
kref, ktype k s e t , kobject. e n t r y
kset. .
kobject
. , , s t r u c t cdev,
kobj.

356

17

/* cdev - */
struct cdev {
struct kobject
kobj;
struct module
*owner;
struct file_operations
*ops;
struct list_head
list;
dev_t
dev;
unsigned int
count;
};
kobject ,
, kobject. , ,
kobject, . , cdev
cdev->kobj->parent cdev->kobj->entry.

k t y p e
kobject , ktype. ktype s t r u c t kobj_type,
<linux/kobject.h> .
struct kobj_type {
void (*release)(struct kobject * ) ;
struct sysfs_ops
*sysfs_ops;
struct attribute
**default_attrs;
};
ktype kobject.
,
ktype, "" .
r e l e a s e , , . , , .
sysfs_ops sysfs_ops.
sysfs
. " sysfs".
, d e f a u l t _ a t t r s a t t r i b u t e .
, kobject . . kobject sysfs,
. NULL.

kobject sysfs

357

k s e t
k s e t kobject.
kset , , " ". kset ktype, :
" ?" k s e t
kobject, ktype ,
k o b j e c t .
ktype kset.
kset kobject k s e t . k s e t kset,
< l i n u x / k o j e c t . h > .
struct kset {
struct
struct
struct
struct
struct
};

subsystem
kobj_type
list_head
kobject
kset_hotplug_ops

*subsys;
*ktype;
list;
kobj;
*hotplug_ops;

ktype ktype, , l i s t - kobject , kobj kobject,


, hotplug_ops ,
kobject , .
, sybsys s t r u c t subsystem, kset.


kset. k s e t kobject, kset, , k o b j e c t
. kset
.
, s t r u c t subsystem.
struct subsystem {
struct kset
struct rw_semaphore
};

ksot;
rwsem;

subsystem kset, kset subsystem

358

17

subsys. , , subsystem.
k s e t , subsystem, k s e t
, ,
.
rwsem s u b s y s t e m - (. 9,
" "),
k s e t . k s e t - ,
.


, , , ( ) (
), .
k o b j e c t ,
. ,
.
k o b j e c t ,
s t r u c t k o b j e c t . k o b j e c t
, , - .
k o b j e c t . k o b j e c t ,
.
k o b j e c t k t y p e ,
s t r u c t k o b j _ t y p e .
ktype k o b j e c t .
ktype :
, , sysfs, .
k o b j e c t , k s e t .
k s e t s t r u c t k s e t .
. -,
k o b j e c t
k o b j e c t . -, k o b j e c t . sysfs k o b j e c t
.
, ,
k s e t .
k s e t . s t r u c t subsystem.
, sysfs,
.
. 17.1 .

kobject sysfs

359

kset

kobj

kobj

kset

kobj

kobj

kobj

kobj

kobj

kobj

. 17.1. kobject,
kset


k o b j e c t
,
k o b j e c t , , kobject
. , kobject. kobject
( -) " "
. , kobject
, ,
, .
k o b j e c t - . kobject k o b j e c t _ i n i t () ,
< l i n u x / k o b j e c t . h > .
void kobject_init(struct kobject *kobj);
kobject, . , , .
,
kobject. memset ().
memset(kobj, 0, sizeof (*kobj));
p a r e n t
kset, .

360

17

kobj = kmalloc(sizeof (*kobj), GFP_KERNEL);


if (!kobj)
return -ENOMEM;
memset(kobj, 0, sizeof (*kobj));
kobj->ksct - kset;
kobj->parent = parent_kobj;
kobject_init (kobj);

k o b j e c t _ s e t _ n a m e ( ) , .
int kobject_set_name(struct kobject * kobj, const char * fmt, , . . . ) ;
, p r i n t f () p r i n t k (). ,
k_name k o b j e c t . ,
name,
.
,
k s e t , k t y p e .
, k s e t
ktype , ktype,
k s e t , . ,
k o b j e c t ktype, !


, k o b j e c t ,
.
. , , ,
(pinned, , ). , ,
.
, .
(getting), (putting)
. ,
, .

kobject_get().
struct kobject * kobject_get(struct kobject *kobj);
kobject
NULL .

kobject_put().
void kobject put(struct kobject *kobj);

kobject sysfs

361

, , , , r e l e a s e ktype .

k r e f

kref, <linux/kref. h> .
struct kref{
atomic_t refcount;
};
,
. , . kref, k r e f _ i n i t () .
void kref_init(struct kref *krcf)
{
atomic_set(&kref->refcount, 1 ) ;
}
, a t o m i c _ t , .
, k r e f , kobject.
k r e f ,
k r e f _ g e t ( ) .
void kref_get(struct kref *kref)
{
WARN_ON(!atomic_read(&kref->refcount));
atomic_inc(&kref->refcount);
}
. . kref,
kref_put ().
void kref_put(struct kref *kref, void (*release) (struct kref *kref))
{
WARN_ON(release == NULL);
WARN_ON(release == (void (*)(struct kref *))kfree);
if (atomic dec_and_test (&kref->refcount))
release (kref);
}
r e l e a s e ( ) , , .
WARM_ON ( ) , r e l e a s e () k f r ( ) ,

362

17

,
s t r u c t kref
.

atomic_t,
kref , .
l i b / k r e f . c
<linux/kref.h>.

sysfs
sysfs ,
kobject. . k o b j e c t
, , .
, sysfs . ,
. , , /, . ,
k o b j e c t , sysfs driverfs.
, k o b j e c t . , 2.6,
sysfs, .
sysfs kobject
d e n t r y , k o b j e c t .
12, " ",
d e n t r y .
,
. k o b j e c t

. , kobject ! , ,
sysfs.
. 17.2 sysfs, /sys.
sysfs : block, bus,
c l a s s , devices, firmware, module power. b l o c k
-.

kobject sysfs

363

/sys
block/
fd0
hda
dev
device-> . ./. ./devices/psi0000:00/0000:00:1f.1/ide/0.0
hda1
dev
size
start
star
hda2
hda3
hda4
hda5
hda6
queue
lidc
hdd
loop0
loop1
Ioop2
Ioop3
Ioop4
Ioop5
Ioop6
Ioop7
md0
bus/
class/
devices/
firmware/
power

. 17.2. /sys

,
. bus . c l a s s
,
. devices
. .
firmware , ACPI, EDD, EFT .. power
.
d e v i c e s ,
.
. , , devices. ,
/ s y s / c l a s s / n e t / .
eth0, device
devices.
/sys Linux,
.
. c l a s s , devices

364

17

bus.
. , , ,
1.


sysfs
kobject
sysfs. sysfs, kobject_add().
int kobject_add(struct kobject *kobj);
sysfs . p a r e n t , , ,
p a r e n t . p a r e n t , ,
kset->kobj.
p a r e n t , kset, ,

sysfs. ,
. p a r e n t k s e t ( ) kobject_add () . ,
kobject sysfs,
kobj->name.
k o b j e c t _ i n i t ()
kobject_add(), k o b j e c t _ r e g i s t e r ().
int kobject_register(struct kobject *kobj);
sysfs
kobject_del().
void kobject_del(struct kobject *kobj);
kob j c t u n r e g i s t er ()
kobject_del() kobject_put().
void kobject_unregister(struct kobject * kobj);
l i b / k o b j e c t .
<linux/kobject.h>.

sysfs, , ,
HAL, hardware abstraction layer ( ), h t t p : / / h a l . f r e e d e s k t o p . o r g / .
HAL
sysfs, , . HAL API,
.

kobject sysfs

365

sysfs
kobject ,
. ? sysfs
, .

,
, , ktype kobject kset. ,
kobject ,
. kobject_type d e f a u l t _ a t t r s ,
a t t r i b u t e . sysfs.
a t t r i b u t e s <linux/sysfs.h>.
/* a t t r i b u t e -
sysfs */
struct attribute {
char
*name;
/* */
struct module
*owner;
/* , ,
*/
mode_t
mode;
/* */
};
name .
sysfs. owner module,
, .
, NULL. mode
mode_t sysfs. ,
S_IRUGO, ,
S_IRUSR. ,
, S_IRUGO | S_IWUSR.
sysfs .
a t t r i b u t e ,
sysfs_ops , . sysfs_ops , < l i n u x / s y s f s . h >
.
struct sysfs_ops {
/* sysfs */
ssize_t (*show) (struct kobject *kobj,
struct attribute *attr,
char *buffer);
/* sysfs */
ssize_t (*store) (struct kobject *kobj,
struct attribute *attr,
const char *buffer,
size_t size);
};
366

17

show() . , a t t r , ,
buffer. PAGE_SIZE . PAGE_SIZE 4096 . ,
, , .
s t o r e ( ) . s i z e buffer a t t r . PAGE_SIZE
. , , .
-
, ,
, .


, ktype, kobject, . ,
k o b j e c t -
. ,
kobject.
, sysfs, .
, kobject
. ( ) ,
. sysf s _ c r e a t e _ f i l e ()
.
int sysfs_create_file(struct kobject *kobj, const struct attribute *attr);
a t t r i b u t e ,
a t t r , k o b j e c t , kobj.
, (
). .
, sysfs_ops, ktype . , show () s t o r e {), ,
.
, . sysfs
.
int sysfs_create_link(struct kobject *kobj,
struct kobject *target, char *name);

kobject sysfs

367

name , kobj, , t a r g e t .

.


sysfs_remove_f i l e ().
void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr);

.
, sysfs_create_link (), sysfs_remove_link().
void sysfs_remove_link (struct kobject *kobj, char *name) ;
name , kobj.
< l i n u x / k o b j e c t . h > .
s y s f s _ c r e a t e _ f i l e ( ) sysfs_remove_file() fs/sysfs/
f i l e . , s y s f s _ c r e a t e _ l i n k ( ) sysfs_remove_link()
fs/sysfs/symlink.c.

sysfs
sysfs , , i o c t l ( ) , procfs.
sysfs .
, i o c t l ( ) , sysfs, .
, - ,
i o c t l (), / r .
sysfs , .
-, sysfs .
.
, /.

, sysfs
, . , ,
, .
. . ,

368

17

sysfs , , .
-, sysfs
. "" kobject. kobject
, , . sysfs !
, , sysfs
(Application
Binary Interface, ABT). , ,
sysfs.
, , , .
sysfs .
sysfs .


(kernel event layer) ,
, , , , kobject.
2.6.0 , ,
, , . ,
. , " !", " !", " !", " !" (, , ).
,
kobject sysfs. .
, , , kobject.
sysfs,
sysfs. , , /sys/block/hda. kobject.
, (verb) (action). , ,
(modified) (unmounted).

kobject sysfs

369


( , payload).
, , , sysfs.
n e t l i n k . n e t l i n k - (multicast),
, .
n e t l i n k .
-, , , . ,
, D-BUS2. .
, ,
.

k o b j e c t _ u e v e n t ( ) .
int kobject_uevent(struct kobject *kobj,
enurn kobject_action action,
struct attribute *attr);
kobject, . sysfs, , .
, . , , enurn k o b j e c t _ a c t i o n .
, ,
(enum). ,
, . <linux/kobject_uevent.h> KOBJ_foo.
: KOBJ_MOUNT,
KOBJ_UNMOUNT, KOBJ_ADD, KOBJ_REMOVE J_CHANGE. "mount" (), "unmount" (), "add" (), "remove" () "change" () .
, .
a t t r i b u t e .
(payload) .
, , sysfs .
2
D-BUS
http://dbus.freedesktop.org/.

370

17


. , , GFP_ATOMIC.
int kobject_uevent_atomic (struct kobject *kobj,
enum kobject_action action,
struct attribute *attr);
. .
kobject sysfs,
,
sysfs.
l i b / k o b j e c t _ u e v e n t .
<linux/kobject_uevent.h>.

k o b j e c t
sysfs
, sysfs, k o b j e c t .
:
kset, , , ktype kref.
.
. kobject , .
,
s y s f s _ c r e a t e _ f i l e ( ) , .
, ,

kobject. kobject ,
, !!!
, . , . , !

kobject sysfs

371

18

,
, .
,
. ,
.
, , , ,
. , ,
, , . - ,
.


, ? .
. ,
, . , , , .
, . .
. ,
. , -
. , ,
, , .
, ( ,
?). , , . , ,
.
, .
,
. , .
, , .

(, ), , , .
, , -
. ,
, foo core. -. , .

.
, , , - , ,
, . , .
, , ,
.


, .
.
(, ) (, ). ;
.
, , , . , ,
(race condition).
, ,
, , .
,

NULL, (""),
( ). NULL
"oops", , "" ( , "oops", ). "oops".
: , , ,
.
.
374

18

, , .
,
, (race)
.
, ,
( , ,
).

p r i n t k ( )
p r i n t k ( )
p r i n t f ( ) . . : p r i n t k ( ) , . ,
.

p r i n t k ( )
p r i n t k ( )
. p r i n t k ()
.
. .
- .
, , p r i n t k () , .

p r i n t k ( )
p r i n t k ( ) .
,
. , , ?
, ,
(,
s e t u p _ a r c h ( ) ,
). : - , .
, . , ,

, (, ). ,
.
, ( i386)
, .

375

p r i n t k ( ) ,
e a r l y _ p r i n t k ( ) . p r i n t k ( ) ,
.
, , . , .
,
, p r i n t k ( ) , .


p r i n t k ( ) p r i n t f ( )
(loglevel). ,
. , ,
(console logleyel). .
printk(KERN_WARNTNG " !\n");
printk(KERN_DEBUG " !\n");
p r i n t k ( " l o q l e v e l ! \ n " );
KERN_WARNING KERN_DEBUG < l i n u x / k e r n e l . h > . , "<4>" "<7>",
, p r i n t k ( ) . ( c o n s o l e _ l o g l e v e l )
. . 18.1
.
18.1. (loglevel)
loglevel

KERN_EMERG

KERN_ALERT

KERN_CRIT

KERN_ERR

KERN_WARNING

KERN_NOTICE

KERN_INFO

KERN_DEBUG

, DEFAULT_MESSAGE_LOGLEVEL, KERN_
WARNING. , .

376

18

KERN_EMERG "<0>", KERN_DEBUG, "<7>". ,


.
printk("<4>o !\n");
printk("<7> !\n");
printk("<4> loglevel!\n");
p r i n t k ( ) . ,
, , . ,
" ", "", "" - .

, KERN_CRIT, - .
KERN_DEBUG
.
.
< l i n u x / k e r n e l . h > .


(log buffer) LOG_BUF_LEN.
CONFIG_
LOG_BUF_SHIFT.
16 . 16 . ,
. ,
.
.
, p r i n t k ( )
. , . ,
. , , , .
,
, .

syslogd klogd
Linux klogd,
. k l o g d / p r o c / k m s g ,
s y s l o g ( ) .

377

/rc. , klogd
, . , ,
. syslogd.
s y s l o g d ,
/ v a r / l o g / m e s s a g e s .
/ e t c / s y s l o g . c o n f .
(console loglevel)
klogd -.


p r i n t k ( )
,
p r i n t f ( ) p r i n t k ( ) . , p r i n t f ( ) . ,
,
.
, ,
p r i n t k ( ) p r i n t f ( ) .
, , .

Oops
oops , - . ,
, , , - . ,
oops. ,

(back trace). , "''
, oops . oops . , ,
oops,
. ,
. .
. , oops,
, . .
oops (idle task, p i d ),
i n i t ( p i d ),
,

378

18

. , oops
, .
oops ,
(memory access violation)
. , oops
, , .
oops , tulip.
Oops: Exception in kernel mode, sig: 4
Unable to handle kernel NULL pointer dereference at virtual address 00000001
NIP: C013A7F0 LR: C013A7F0 SP: C0685E00 REGS: c0905dl0 TRAP: 0700
Not tainted
MSR: 00089037 : 1 PR: 0 FP: 0 ME: 1 IR/DR: 11
TASK = c0712530[0] swapper Last syscall: 120
GPROO: C013A7C0 C0295E00 C0231530 0000002F 00000001 C0380CB8 C0291B80 C02D0000
GPR08: 000012AO 00000000 00000000 C0292AA0 4020A088 00000000 00000000 00000000
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
GPR24: 00000000 00000005 00000000 00001032 C3F7C000 00000032 FFFFFFFF C3F7C1C0
Call trace:
[c013ab30] tulip_timer+0xl28/0xlc4
[c0020744] run_timer_softirq+0xl0c/0xl64
[c001b864] do_softirq+0x88/0xl04
[c0007e80] timer_mterrupt+0x284/0x298
[c00033c4] ret_from_except+0x0/0x34
[c0007b84] default_idle+0x20/0x60
[c0007bf8] cpu_idle+0x34/0x38
[c0003ae8] rest_init+0x24/0x34

(32 - !). oops 86,
, . ,
: .
, . ,
: - : c p u _ i d l e ( ) , d e f a u l t _ i d l e ( ) .
, .
t u l i p _ t i m e r ( ) , NULL.
( 0128/014, ) , .
, .
, . ,
. ,
NULL ( ) ,

379

. , ,
(race) . .

ksymoops
oops
, , .
.
NIP: C013A7F0 LR: C013A7F0 SP: C0685E00 REGS: c0905dl0 TRAP: 0700
Not tainted
MSR: 00089037 : 1 PR: 0 FP: 0 ME 1 IR/DR: 11
TASK = c0712530[0] 'swapper' Last syscall: 120
GPROO: C013A7CO C0295E00 C0231530 0000002F 00000001 C0380CB8 C0291B80 C02D0000
GPR08: 000012AO 00000000 00000000 C0292AA0 4020A088 00000000 00000000 00000000
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
GPR24: 00000000 00000005 00000000 00001032 C3F7C000 00000032 FFFFFFFF C3F7C1C0
Call trace: [c013ab30] [c0020744] [c001b864] [c0007e80] [c00061c4]
[c0007b84] [c0007bf8] [c0003ae8]
. ksymoops
System.map, . , .
ksymoops , .
ksymoops saved_oops.txt
oops. , , ,
,
. , ,
.
ksymoops
Linux.

k a l l s y m s
, ksymoops.
, , , , System.map, oops.
2.5 k a l l s y m s ,
CONFIG_KALLSYMS.

,

380

18

. ,
oops System.map,
kallsyms.
, ,
. , , .


, .
Kernel hacking .
CONFIG_DEBUG_KERNEL.
, .
,
(slab layer debugging),
(high memory debugging), - (I/O mapping debugging), - (spinlock debugging) (stack overflow checking). ,

- (sleep-inside-spinlock checking), -


2.5 . 8, " ", ,
, .
, -, , .
. -
.
, . , , , , ,
. ,
, schedule ()
, ,
, . .

.

381

CONFIG_PREEMPT=y
CONFIG_DEBUG_KERNEL=y
CONFIG_KALLSYMS=y
CONFIG_SPINLOCK_SLEEP=y


, , . BUG()
BUG_ON(). oops,
. oops .
BUG() BUG_ON()
,
oops.

(assertion), , .
if (bad_thing)
BUG();
.
BUG_ON(bad_thing);
panic ().
p a n i c () . ,
.
if (terrible_thing)
panic("foo is %ld!\n", foo);
, . dump_stack(). .
if (!debug_check) {
printk(KERN_DEBUG " . . . \ n " ) ;
dump_stack();
}

SysRq
SysRq, CONFIG_MAGIC_SYSRQ ,
. SysRq . i386
A L T - P r i n t S c r e e n . , 382

18

, .
.
s y s c t l
.
echo 1 > /proc/sys/kernel/sysrq
SysRq-h. SysRq-s
,
SysRq-u , a SysRq-b .
, , r e s e t .
, SysRq, . , ,
. . 18.2 SysRq.
18.2. SysRq

SysRq-b

(reboot)

SysRq-e

S I G T E R M , i n i t

SysRq-h

SysRq

SysRq-i

SIGKILL , i n i t

SysRq-k

: ,

SysRq-l

S I G K I L L , i n i t

SysRq-m

SysRq-o

(shutdown)

SysRq-p

SysRq-r

(raw mode)

SysRq-s

SysRq-t

SysRq-u

D o c u m e n t a t i o n / s y s r q . t x t ,
, .
d r i v e r s / c h a r / s y s r q . . SysRq ,
"" , .
.
, .

383



. , , , -
.
,
.
,
.
, ,
. ,
, .
, , , Linux - gdb.

gdb
, GNU.
.
gdb vmlinux /proc/kcore
vmlinux ,
, . zlmage, bzlmage .
/proc/kcore core, .
, root.
gdb
. , .
global_variable
,
.
disassemble function
-g ( -g
CFLAGS Makefile ), gdb
. ,
. , .
, gdb.
. , (breakpoint).
.
384

18

,
.

kgdb
kgdb , gdb
. . kgdb. (- ,
) gdb. kgdb
gdb: ,
, (watch points),
.. kgdb .
kgdb , , .
Documentation/,
.
kgdb . .

kdb
kgdb kdb. kgdb kdb
. kdb , ,
. ,
.
b r e a k . oops . Documentation/kdb . kdb
h t t p : / / o s s . s g i . c o m / .


, , , .
, , . .

UID
,
"" .
, , . , , -

385

f o r k ( ) , ,
. ,
, f o r k ()
. , .
, .
(UID) , .
if (current->uid != 7777) {
/* .. */
} else {
/* .. */
}
lice , , UID 7777
.
7777. , .


, ,
, ,
. ,
. , . ,
. , .
, .


,
. .
.
, , foo bar. , ,
, .
unsigned long foo_stat = 0;
unsigned long bar_stat = 0;
, .
. , /,
. .
,
SMP . . ,
386

18

, , .


(
), . , .
p r i n t k (),
.
. , , ,
. , , .
static unsigned long prev_jiffy = jiffies; /* */
if (time_after (jiffies, prev_jiffy + 2*HZ)) {
prev_jiffy = jiffies;
printk (KERN_ERR "blah blah blah\n");
}

,
.
. , , .
, . . , - . , .
, , , ,
.
, .
static unsigned long limit = 0;
if (limit < 5) {
limit++;
printk(KERN_ERR "blah blah blah\n");
}
.
,

.
.
( s t a t i c ) , .
.
SMP, ,
. , , , ?

387



, .
, 2.4.18, 2.4.17,
, . , .
, .
, , , . ,
, . , .
, , . ,
.
. , . ,
, ,
. , . , ,
, .
, , . ,
. ,
. . , ,
2.4.11, 2.4.20. ,
2.4.15. 2.4.15 .
2.4.15 , .
2.4.15 2.4.20, 2.4.17. ,
2.4.15 , ,
, , 2-4.13. .
,
. , .
!


, , . , , . Linux,
.
, , , . , .
20, ", "

Linux (Linux Kernel Mail List, LKML).
388

18

19

inux , .
, , , ,
( ). , Linux , ()
. ,
. Linux
( , ).
, , ,
.
. .
, ,
. .
.
,

. . Minix, OpenBSD
.
, . , ,
- . .
.
. DOS
Windows 9x.
, , .

Linux
. , . , ,
. , , ,
. Linux .
, , . .
.
( ), , , .
, .
. - . k e r n e l / s c h e d . . , ,
. , c o n t e x t _ s w i t c h (),
, s w i t c h _ t o () Kwitch_ram ()
.
s w i t c h _ t o () s w i t c h _ m m () , Linux.
Linux ,
.
, ,
arch/< >/ include/asm-<aapaa >/,
< > , , Linux. ,
Intel 8 i386.
a r c h / i 3 8 6 i n c l u d e / a s m - i 3 8 6 . 2.6 : a l p h a , arm, c r i s , h8300, i38, i a 6 4 , m68k, m68knommu, mips,
mips64, p a r i s c , ppc, ppc64, s390, sh, s p a r e , s p a r c 6 4 , um, v850 8-4.
. 19.1.

Linux
Linux
, Intel i386.

, .
, Linux , i386! 1993 -

390

19

Linux Digital Alpha. Digital Alpha


RISC- 64-
. i386,
. , Alpha , Alpha 8. ,
, , .
,
1
. , ,
.
Linux i386,
1.2 Digital Alpha, Intel x86, MIPS SPARC,
.
2.0
Motorola 68k PowerPC. , 1.2, .
2.2
: ARM, IBM S/390 UltraSPARC.
2.4 ,
15. CRIS, IA-64,
64- MIPS, HP PA-RISC, 64- IBM S/390 Hitachi SH.
2.6
20 Motorola 68k MMU, 8/300, IBM
POWER, v850, x86-64 ,
Linux - Usermode Linux. 64- s390 32 s390, .
,
.
, ARM PowerPC, . , Linux 20 , !


(word) ,
. , (character, 8 ) ( ).
, 16, 32 64. "n-" ,
. , ,
Intel Pentium 32- , , 32 , 4 .
1

. - , !
.

391


. . , , Linux,
2. ,
. ,
long . , Alpha 64 . , , long 64 . i n t 32 . Alpha 64
.
,

. , ,
. , (byte 8 ), (word 16 ),
(double w o r d 32 ) (quad w o r d 64 ),
32-. Linux , .

,
Linux, <asm/types.h> BITTS_PER_LONG,
long .
. 19.1.
, , , 3,
.
. ,
, , . Linux long .
ANSI , Linux. ,
, . , , int
4
, long .
2

.
, 64- 64 ,
48 . ,
, , ,
Intel PAE.
3

c h a r , 8 .

, 64- , Linux, i n t l o n g . int 32 , l o n g 64 .


32-ph 32 .

392

19

1 9 . 1 .

alpha

Digital Alpha

arm

ARM StrongARM

32

cris

CRIS

32

h8300

H8/300

32

I386

Intel x86

32

ia64

IA-64

64

m68k

Motorola 68k

32

m86knommu

m68k MMU

32

mips

MIPS

32

mips64

64- MIPS

64

parisc

HP PA-RISC

32 , 64

ppc

PowerPC

32

ppc64

POWER

64

s390

IBM S/390

32 , 64

sh

Hitachi SH

32

spare

SPARC

32

sparc64

UltraSPARC

64

um

Usermode Linux

32 , 64

v850

v850

32

x8_ 64

X86-64

64

64

,
. sparc64 32-
, , i n t long 32 .
i n t 32 , long 64 .
.
.
, char 8 (1 ),
, i n t 32 ,
.
short,
16 .
, long .
32, 64 .

393

long , , s i z e o f (int.) == s i z e o f ( l o n g ) .
, int .


(opaque) ,
, . ,
.
- . ,
t y p e d e f ,
,
.
, . p i d _ t ,
. , , ,
int. ,
, .
: Unix- p i d _ t
a h o r t .
a t o m i c _ t . 9, " ", , .
int, , ,
.
, 32 ,
SPARC.
dev_t, g i d _t u i d _ t .
.
, .
.
, .


, ,
, .
j i f f i e s f l a g s , . u n s i g n e d
long.

394

19

, ,
.
, unsigned i n t . 32-
, 64- .


.
,
, .
, 32- ,
16- , - 8 cookie. , ,
.
<asm/types.h>,
<linux/types.h>. . 19.2 .
19.2.

s8

u8

s16

16-

ul6

16-

s32

32-

u32

32-

s64

64-

u64

64-

o .
, ,
typedef . 64- .
typedef
typedef
typedef
typedef
typedef
typedef
typedef
typedef

signed char s8;


unsigned char u8;
signed short s16;
unsigned short ul6;
signed int s32;
unsigned int u32;
signed long s64;
unsigned long u64;

395

32- , .
typedef
typedef
typedef
typedef
typedef
typedef
typedef
typedef

signed char s8;


unsigned char u8;
signed short s16;
unsigned short ul6;
signed int s32;
unsigned int u32;
signed long long s64;
unsigned long long u64;

char
, char
. , char , , .
char ,
-128 127.
, ARM, char ,
0 255.
, , char ,
i 255 -1.
char i = -1;
, char , i - 1 . ,
- 1 ,
.
signed char i = -1;
char, ,
signed char, unsigned char. , .


(alignment) .
, (naturally aligned),
, .
, 32- , , 4 (..
). , 2n , n .
. , RISC,
(trap), , .
, -

396

19

. , , .


, , . ,
, ,
gcc.
, , ,
.
, ,
( -). .
char dog [10];
char *p = &dog[1];
unsigned long 1 = * (unsigned long *)p;
unsigned char , u n s i g n e d long, , 32-
u n s i g n e d long , .
: " ?', , , .
, , - , . , , .


,
. () .
,
( ).
(union) , , , .
,
, .
(padding).

397


, . , 32 .
struct animal_struct {
char dog;
unsigned long cat;
unsigned short pig;
char fox;
};

/*
/*
/*
/*

1
4
2
1

*/
*/
*/
*/

,
. , .
struct animal_struct {
char dog;
u8 __pad0[3];
unsigned long cat;
unsigned short pig;
char
fox;
u8
__padl;
};

/*
/*
/*
/*
/*
/*

1
3
4
2
1
1

*/
*/
*/
*/
*/
*/

, . , c a t
4- .
. ,

.
, s i z e o f (foo_struct) 12 32- . ,
.
, .
, ,
.
s t r u c t animal s t r u c t {
unsigned long c a t ;
/*
unsigned short pig;
/*
char dog;
/* 1
char fox;
/* 1
};

4 */
2 */
*/
*/

8 . -

398

19

. , , , . ,
,
, . , , ANSI ,
5
.

:
, . ,
. ,
, , . , ,
, , .


(byte ordering) ,

.
: ( ) ( , left-most),
( , right-most) .
(big-endian), ( ) , . (little-endian), ( )
, .
- (,
- ). Linux , , , .
. 19.1 ,
. 19.2 .
i386 (little-endian) .
(big-endian)
.

,
, , .
.

399

. 19.1. (big-endian)

. 19.2. (little-endian)

,
1027, .
00000000 00000000 00000100 00000011


, . 19.3.
19.3.

00000000

00000011

00000000

00000100

00000100

00000000

00000011

00000000

,
.
, ,
, .
int = 1;
if (*(char *)& == 1)
/* */
else
/* */

, .
400

19

big-endian little-endian
big-endian little-endian
" ", 1726 .
, : (big) (little). ,
"" (big-endian), ,
, "" (little-endian).
, , , , .


, Linux,
< a s m / b y t e o r d e r . h > __BIG_ENDIAN
__LITTLE_ENDIAN, .
i n c l u d e /
l i n u x / b y t e o r d e r / , . .
u23__cpu_to_be32(u32); /*
big-endian */
u32__cpu_to_le32(u32); /*
little-endian */
u32 be32_to_cpu(u32); /* big-endian
*/
u32__l32_to_cpus(u32); /* little-endian
*/
. , , (,
), .
.

- , , , j i f f i e s . HZ,
. ,

, .
, HZ 86 1000.
, 1000 , . 2.6 86
HZ 100. :
alpha HZ 1024, ARM 100.

401

j i f f i e s ,
1000, , . HZ, .
HZ
(2*HZ)
(HZ/2)
(HZ/100)
(2*2/100)

/*
/*
/*
/*
/*

*/
*/
*/
10 */
20 */

HZ <asm/param.h>.
10, " ".



. , 86, , , 4 . 86, .
! . 19.-1
.
19.4.

alpha

PAGE_SHIFT
13

arm

12, 14, 15

P A G E _ S I Z E
8
4 , 16 , 32

cris

13

h8300

12

i386

12

ia64

12, 13, 14, 16

4
4 , 8 , 32 , 64

m68k

12, 13

m86knommu

12

mips

12

mips64

12

12

12

64

12

4
4

parisc

4 , 8
4

S390

12

sh

12

spare

12,13

4 , 8

sparc64

13

v850

12

86_64

12

402

19

PAGE_SIZE,
.
PAGE_SHIFT ,
,
. , 86,
4 , PAGE_SIZE 4096, PAGE_SHIFT 12. <asm/page.h>.


9, " ", . ,
.
.
- , ,
- . , rmb() wmb().
9, " ".

,

,
. , , , Linux,
.
,
. SMP,
,
, . .
, SMP-
.
,
,
.
, ( ) kmap ().

403


, ,
.
:
, , , ,
.
:
, ,
, .
:
, , , ,
, ..
, . , .
, , .

404

19

20
,

Linux
.
. ,
, . ,
.

,
Linux, Linux (Linux Kernel
Mail List, , , LKML). Linux ,
, Linux.
,
, - .
300 . ( ) , .
, .
,
subscribe linux-kernel <your@email.address>
m a j o r d o m o @ v g e r . k e r n e l . o r g . - h t t p : / / v g e r . k e r n e l . o r g / , (FAQ) h t t p : //www.tux.org/lkml/.
WWW- ,
Linux. h t t p : / /
www.kernelnewbies.org/, , , ,
, . -

http://www.lwn.net/, Linux Weekly News,


, h t t p : / / w w w . k e r n e l t r a f f i c . o r g , Kernel
Traffic,
Linux . .


, Linux
, . , , Linux,
( ), ,
( ), ,
. ,
, . , , ,
. , , ,
. , , , ,
.
, .
, , . ,
, , . D o c u m e n t a t i o n / C o d i n g S t y l e .

.
.
, "" - . ,
. ,
, . .
,
. ?
, .


, ,
, , - - .
406

20

, .
, .
if (fox) {
dog();
cat();
}
, ,
, ,
if (fox) {
ant();
pig();
} else {
dog();
cat();
}
.
do {
dog();
cat();
} while (fox);
,
.
unsigned long func(void)
{
/* .. */
}
, , ,
.
if (foo)
bar();
1

K&R .


, ,
80 . , 80x24 , .

. , . , , 2- . . . .: .
"", 2005.

407

, , 80 .
,
.
,
,
.
,
, .
static void get_pirate_parrot(const char *name,
unsigned long disposition,
unsigned long feather_quality)
, ,
, .
int find_pirate_flag_by_color(const char *color,
const char *name, int len)
, , .

.
idx, i , , . , theLoopIndex, . " " (Hungarian notation), , . ,
Java Unix, Windows.
, . a t t y ( ) , . g s t a c t i v e t t y ( ) . - Linux,
BSD.

: .
, . . , i n l i n e .

, .
, . , , , . 408

20

, , , , .
, , ,
, . .
, gcc
C++. ( , ).
/*
* get_ship_speed() -
*
* .
* ,
* .
*/
, , ,
. "XXX: ",
"FIXME: ", .
/*
* FIXME: , dog == cat.
*
*/
.
GNOME-doc, Kernel-doc.
HTML .
make htmldocs
postscript .
make psdocs
.
/**
* find_treasure - ,
* @map -
* @time - ,
*
* pirate_ship_lock.
*/
void find_treasure (int dog, int cat)
{
/* .. */
}
. Documentation/kernel-doc-nanoHOWTO.txt.

409

t y p e d e f

typedef, .
.
t y p e d e f
.
, , , .
typedef .
, typedef.
, ,
t y p e d e f :
. , t y p e d e f ,
.

,
. ,
.
. , Linux, ,
. ,
.

i f d e f
i f d e f
. - .
...
#ifdef config_foo
foo();
#endif
...
, CONFIG_FOO ,
f(), , .
tifdef CONFIG_FOO
static int foo(void)
{
/* .. */
}
#else
static inline int foo(void) { }
#endif
410

20

foo() . .


, . .
. , 99 "" ,
gcc GNU .
, ,
99, .
struct foo rny_foo = {
. = INITIAL_A,
. = INITIAL_B,
};
b s t r u c t foo, INITIAL_A INITIAL_B
, . ,
, ANSI ( NULL, ,
0.0). , s t r u c t foo
i n t , 0.
, . .


, Linux, . ,
i n d e n t . i n d e n t GNU, Linux .
GNU,
. Linux, .
indent -kr -i8 -ts8 -sob -180 -ss -bs -psl <>
s c r i p t s / L i n d e n t , i n d e n t .


, Linux.
, , . , ,
CREDITS, .

411

(mainlainers),
.
, .
, ,
. ,
, MAINTAINERS.
.
,
(kernel maintainer). ,
( ) . , . ,
. , 2.0, 2.4 2.6 .
, , , .


, , , , . , ,
- .
.
, oops ( ). , ,
, .
, .
MAINTAINERS , . ,
, . , Linux l i n u x - k e r n e l @ v g e r . k e r n e l . o r g .
,
.
REPORTING-BUGS
Documentation/oops-tracing.txt.


Linux (patch).
GNU d i f f ( 1 ) ,
p a t c h ( 1 ) . , :
, .
, , , 1 i n u x - x . . z
(, tar), a
412

20

l i n u x . ,
.
diff -urN linux-x.y.z/linux/ > my-patch
- , / u s r / s r c /
linux, root. -u , diff.
. -r , -N ,
, ,
diff.
, .
diff -u linux-x.y.z/some/file_linux/some/file > my-patch
, ,
, , .
, , ,
,
. ,
,
.
patch -p1 < ../my-patch
, , - my-patch,
,
. -p1 , (strip)
, .
,
, .
d i f f s t a t ,
( ).
- ,
.
diffstat -p1 my-patch
lkml. p a t c h (1)
, diff,
.


, .
,
, ,
MAINTAINERS.
l i n u x - k e r n e l @ v g e r . k e r n e l . o r g .
,

413

(subject) , , " [PATCH] . ".


, ,
. . ,
.
. , , .
, . . ,
("Insert Inline") - . (attachment)
, .
,
. , API
, ( API )
. - , .
.
! . , ,
, . . ,
!

.
, , ,
. . ,
.
, ,
. Linux ,
, . !

414

20

(),
, , .
, . , .
,
.
,
(next). .
. .1.

next

next

next

NULL

. A.I. &

. ,
(prev). (doubly linked), , . ,
, ..1, (singly linked).
. .2.

NULL

prev

next

prev

next

prev

. .2.

next

NULL


,
n e x t , NULL, , .
, .
(circular linked list),
. ,
. p r e v
. . . .4
.

next

next

next

. A3.

prev

next

prev

next

prev

next

. .4.

Linux
. .


().
, next, ..
.
, . , ,
.

416

,
(head), . , NULL. ,
. ,
. , . , ,
.
.

Linux
Linux . , , ,
, ! ,
.
. , . ,
, .
. , .
.
.
, ,
. ,
( t a s k _ s t r u c t ).


.
,
. 2.1 .
. .
< l i n u x / l i s t . h > , a
.
struct list_head {
struct list_head *next, *prev;
};

l i s t h e a d .
, , . ,

417

,
. (list head).
next , prev . ,
next . , prev . , .
l i s t h e a d .
.
struct my_struct {
struct list_head list;
unsigned long dog;
void *cat;
};

. (,
, ), , ,
.
struct my_struct *;
/* my_struct .. */
p->dog = 0;
p->cat = NULL;
INIT_LIST_HEAD(&p->list);


, .
struct my_struct mine = {
.list = LIST_HEAD_INIT(mine.list),
.dog = 0,
.cat = NULL
};

, .
static LIST_HEAD(fox);

fox.
-
.
, .

418


.
l i s t head. (inline) ,
< l i n u x / l i s t . h > .
, (1)1
, , . ,
3 3000 .
, , , , .

.
list_add(struct list_head *new, struct list head *head)
new head. , head .
, .
.
list_add_tail (struct list_head *new, struct list_head *head)
new , head. , , l i s t _ a d d (), head
. , .
.
list_del (struct list_head *entry)
, e n t r y . , , , , e n t r y . .
,
list_head.
.
list_del_init(struct list head *entry)

419

. l i s t _ d e l ( ) , , l i s t h e a d ,

.
.
list_move(struct list_head *list, struct list_head *head)
l i s t
head.

.
list_move_tail (struct list_head *list, struct list_head *head)
, list_rnove (),
head.
, , .
list_empty (struct list_head *head)
, , .
.
list_splice(struct list_head *list, struct list_head *head)
, l i s t ,
head.
.
list splice init(struct list head *list, struct list head *head)
l i s t s p l i c e ( ) , , l i s t , , ,
.

n e x t p r e v ,
( , ) .
, n e x t p r e v .
, ,
. l i s t _ d e l ( l i s t ) ,
_ _ l i s t _ d e l ( p r e v , n e x t ) . , . .
< l i n u x / l i s t . h > .

420


, , . , ,
, ! , .
. ,

, .
, , , (n).
l i s t _ f o r _ e a c h () . list_head. , ,
.
, ,
.
struct list_head *;
list_for_each(p, list) {
/* list */
}

! ,
. , . my_struct
my_struct,
l i s t . l i s t e n t r y ( ) , list_head. :
, , ,
, .
struct list_head *;
struct my_struct *my;
list_for_each(p, mine->list) {
my = list_entry(p, struct my_struct, list);
/*
* my ,
* list
*/
}

list_for_each () for. ,
for ( = mine->list->next; != mine->list; = p->next)

, l i s t _ f o r _ e a c h ( ) (prefetch) , ,

421

. , l i s t _ f o r _ e a c h ( ) , , for.
, , . , .
,
list_for_each_prev (), prev, next.
,
. , , . list_for_each_safe()
, .
struct list_head *p, *n;
struct my_struct *my;
list_for_each_safe (p, n, &mine->list) {
my = list_entry (p, struct my_struct, list);
/*
* my
* my_struct
*/
}
,
. .

422

Linux , . , .
, , , . , .
, .
. , , .
,
, .
(seed), . , , , , , .
,
, . , ,
, , ..
- .
. (
). (Claude Shennon)1, , ,

(30 1916-24 2001) Bell Labs.


, 1948 ,
.
.

(John von Neumann)2


, , . , .
, ,
, . , , , ""- "", .
. .
,
(, ) "". , ,
, . ,
. , . ,
. , , .
.
,
.
1.3.30
drivers/char/random..


. , ,
. , , , .
, ,
, , . , ,
,
- , , , . , , .
.
, . .
. .1 .

(28 1903-8 1957) (Institute for Advanced Study; Princeton). , . - , - - .

424

, ,
, . , - SHA
. SHA (Secure Hash Algorithm, ) ( ,
message digest),
(National Security Agency, NSA)
(NIST) (
, FIPS 186). , ( ) - ( 128 160 ).
.
-. , (, ) -.
-,
. MD4 MD5. - SHA
, . ,
- .
, .
,
. , . , .
<-



10110001-

->

1111010000->
11100101100000110101
01111010101110100010






add_keyboard_random ness()

00100010110101011000



/dev/random /dev/urandom

. .1.

, . , . ,

425

SHA. SHA , .
.
.
?
, - , .
- , . , . , ,
, . ,
, .
, ,
. ,
. -, .
root, . ,
. ,
, .
.


, ,
. , . ,

. , , , ,
, ,
.
Linux- . .
.
, ,
.


, .
void add_interrupt_randomness(int irq)
void add_keyboard_randomness (unsigned char scancode)
void add_mouse_randomness(__u32 mouse_data)

426

add_interrupt_randomness () , ,
SA_SAMPLE_RANDOM. i r q .
, .
, . (, ) (,
), .
, .
add_keyboard_randomness () - . , , -
.
add_mouse_randoraness () . mouse_data
, .
,
.

add_timer_randomness () .
. , , .
.
, ,
. , , . ,
12 , .


.
void get_random_bytes(void *buf, int nbytes)

nbytes ,
buf. ,
. , . ,

TCP.
.

427

unsigned long rand;


get_random_bytes(&rand, sizeof(rand));

, , : /dev/random /dev/urandom. ,
/dev/random, ,
. , . ,
/dev/random , . /dev/urandom
, . .
.
,
.
,

unsigned long get_random(void)


(
unsigned long seed = 0;
int fd;
fd = open("/dev/urandom", O_RDONLY);
if (fd == -1) {
perror("open");
return 0;
}
if (read (fd, &seed, sizeof(seed)) < 0) {
perror("read") ;
seed = 0;
}
if (close(fd))
perror("close");
return seed;
}

$bytes $file, del.


dd if=/dev/urandom of=$file count=l bs=$bytes

428

,
, ( ,
). .
.
, ,
. , ,
. , .. ,
.

, ,
, , . ,
, , , ,
. Linux
.
(, ,
). , f, , , ,
.
y=f()
.



, .
, , . "-"
. f ()
(g (} ) : f
"-" g. .
f(x)

O(g()),

, f ( x ) g (), ,
, , '.
, , , . ,
, ,
.

-
-, ,
(Donald Knulh) "-".
"-" . , 7
6, , 9, 12 65
6. , , ,
'. -
.
f () - g (x),
g() f()
, f (x) g ( ) . ,
"-" ,
.
, -,
"-" "-".
, , , .

, -.
-, , g ( s ) f () .

430


. ,
. , 7
, 7 . , n ,
n . , ,
(n). , ,
? , , , , , , O(1).
. .1 .
.1.
O(g(x))

( )

log(n)

, ()

n!

( )

?
?
30 , 10
? 100 ?

,

, , ,
( n ! ) (2 n ) . , , ,
(n), , , O(1), . , , "-". ,
( g ( ) ) , g().
, , , O(1),
3 . , 3 , ,
, , , ( n ) , . . .

431


, ,
.
. " " ,
, . ,
.
" ",
. Linux

. ! ! !
!




. , , , . ,
, , , . Deitel.
Deitel H., Deitel P. and Choffnes D. Operating Systems. Prentice Hall, 2003.

.
, , , ,
, .
Tanenbaum Andrew. Operating Systems: Design and Implementation. Prentice Hall, 1997.
, Unix- Minix.

Tanenbaum Andrew. Modern Operating Systems. Prentice Hall, 2001. , ,


, Unix Windows.
Silberschatz A., Galvin P. and Gagne G. Operating System Concepts. John Wiley and
Sons, 2001. , " ",
,
. .
, .

Unix

Unix. Unix,
Unix.

Bach Maurice. The Design of the Unix Operating System. Prentice Hall, 1986.
Unix System V,
Release 2.

McKusick M., Bostic K., Karcls M. and Quarterman J. The Design and Implementation
of the 4.4BSD Operating System. Addison-Wesley, 1996. 4.4BSD
.
McKusick M. and Neville-Neil G. The Design and Implementation of the FreeBSD
Operating System. Addison-Wesley, 2004. FreeBSD 5.
Mauro J. and McDougall R. Solaris Internals: Core Kernel Architecture. Prentice Hall,
2000.
Solaris.
Cooper . and Moore . HP-UX Lli Internals. Prentice Hall, 2004. HP-UX PA-RISC.
Vahalia, Uresh. Unix Internals:The New Frontiers. Prentice Hall, 1995. Unix- , .
Schimmel Curt. UNIX Systems for Modern Architectures: Symmetric Multiprocessing and
Caching for Kernel Programmers. Addison-Wesley, 1994. Unix- .

Linux
, , Linux.
Rubini A. and Corbet J. Linux Device Drivers. O'Reilly and Associates, 2001.
, Linux 2.4.

434

Bovet D. and Cesati M. Understanding the. Linux Kemel O'Reilly and Associates, 2002.
Linux 2.4. .
Mosberger D. and Eranian S. IA-64 Linux Kernel: Design and Implementation. Prentice
Hall, 2002. , Intel Itanium
Linux 2.4 .


, , , .
, Linux. , ,
.
Kogan M. and Deitel H. The Design of OS/2. Addison-Wesley, 1996. OS/2 2.0.
Solomon D. and Russinovich M. Inside Windows 2000. Microsoft Press, 2000.
, Unix.
Richter Jeff. Advanced Windows. Microsoft Press, 1997.
Windows.

API Unix
Unix API
, ,
, .
Stevens W. Richard. Advanced Programming in the UNIX Environment. Addison-Wesley,
1992. , , Unix.
Stevens W. Richard. UNIX Network Programming, Volume 1. Prentice Hall, 1998.
API Unix.
Johnson M. and Troan E. Linux Application Development. Addison-Wesley, 1998.
Linux , .


, ,
.
Knuth Donald. The Art of Computer Programming, Volume 1. Addison-Wesley, 1997.

,
. ( : . . 1. , 3- . - : "", 2000.)

435

Kernighan . and Ritchie D. The Programming Language. Prentice Hall, 1988.


. ( :
, . : "",
2005 .)
Hofstadter Douglas. Godel, Escher, Bach: An Eternal Golden Braid. Basic Books, 1999.
,
.

Web-
WWW- , Linux .
Kernel Traffic. Linux (lkml) ( ), h t t p : / /
www.kerneltraffic.org/
Linux Weekly News. ,
Linux , ( ), http://www.lwn.net/
Kernel Newbies. "Kernel Newbies"
,
. http://www. kernelnewbies -org/
Kernel.org. . http://www.kernel.org/
KernelTrap. , , , Linux.
Linux.
. h t t p : / /
www.kerneltrap.org
OS News. , , . http://www.osnews.com/
, . , , . h t t p : / / t e c h 9 . n e t / r r a l /
kernel_book/

436

Application Programing Interface, API, 96

E
Exception, 99; 110

G
gec, 40
, 41
, 41
, 40
Granularity, 174

Linux Kernel Mail List, 405


lkml, 405

P
POSIX, 96

SHA, 425
SMP-, 71

T
Task, 46
Translation lookaside buffer, TLB, 329

V
Virtual memory area, 316
VMA, 316

A

, 311
, SI 1; 313
, 311

address_space, 297; 333


address_space_operations, 334
, 429
, 391
, 177
, 181
change_bit(), 182
clear_bit(), 182
set_bit(), 182
test_and_change_bit(), 182
test_and_clear_bit(), 182

test_and_set_bit(), 182
test_bit(), 182
, 381
atomic_t, 178
, 178
atomic_add(), 180
atomic_add_negative(), 180
atomic_dec_and_test(), 180
atomic_inc(), 180
atomic_inc_and_test(), 180
atomic_read(), 180
atomic_set(), 180
atomic_sub(), 180
atomic_sub_and_test(), 180

, 314
, 335
-, 320
, 294

advisory, IBS
dcache_lock, 282
deadlock, 172
deadly embrace, 172
dentry->d_lock, 282
inode_lock, 273
lock contention, 174
mmlist_lock, 314
page_table_lock, 329
sell-deadlock, 172
voluntary, 168
xtime_lock, 219
(BKL), 197
lock_kernel(), 198
unlock_kernel(), 198
, 198
, 198
, 172
A, 172
, 159
, 186
, 168; 174
, 168
, 168
, 168
, 173
, 184
, 167
, 168

, 185
, 172; 189
, 199; 222
read_seqbegin(), 199
read_seqretry(), 199
write_seqlock(), 199
write_sequnlock(), 199
, 200
, 200
, 184; 190
DECLARE_MUTEX(), 193
DECLARE_RWSEM(), 195
down(), 192; 193
down_interruptible(), 193
down_read(), 195
down_write(), 195
init_MUTEX(), 193
init_rwsem(), 195
sema_init(), 193
up(), 192
up_read{), 195
up_write(), 195
, 192
, 194
, 195
, 195
, 193; 194
, 192
, 196
, 192
, 194
, 191
, 195
, 192
, 190
, 192
-, 195
, 183
-, 183
read_lock(), 188
read_unlock(), 188
spin_is_locked(), 186
spin_lock(), 184
spm_lockbh(), 187
spin_lock_init(), 186
spin_lock_irq{), 186
spin_lock_irqsave(), 185
spin_try_lock(), 186
spin_unlock(), 184
spin_unlock_bh(), 187
spin_unlock_irq{), 186
spin_unlock_irqrestore(), 185
write_lock(), 189
write_unlock(), 189

438

, 187
, 188
, 187
, 190
, 190
, 187; 190
,189; 196
, 187
, 186; 381
, 200
, 190
, 187
,188

completion, 196
rw_semaphore, 195
semaphore, 193
, 172
, 196
complete(), 197
wait_for_completion(), 197

RAID, 300
, 301
, 298

bio, 298
bio_vec, 298
buffer_head, 296; 300
request, 301
request_queue, 301
, 295
(TLB), 329
, 295
,377
, 377
, 296

-
, 331

, 45; 311
, 45
, 103
CAP_SYS_TIME, 223

(wall time), 207; 208; 221


(epoch), 222

gettimeofday(),223
settimeofday(), 223
, 207
time_after(), 216

time_after_eq(), 216
time_before(), 216
time_before_eq(), 216
(uptime), 208
(time zone), 223
, 396
, 396
, 397
, 397
, 397
, 396
, 398
, 397
, 397
, 66; 164

, 423
, 295
, 174
cource, 174
fine, 174
, 174
, 174

, 425

klogd, 377
kupdated, 339
pdflush, 297; 337; 339
diity_background_ratio, 338
dirty_expire_centisecs, 338
dirty_rado, 338
dirty_writeback_centisecs, 338
laptop_mode, 338
, 340
syslogd, 377
, 23
, 424
, 430

3
, 46
, 46

, 412
, 413
, 413

diff, 412
diffstat, 413
patch, 412
, 398

, 38; 347
, 38
, 99; 110
, 423

, 23
, 295
, 423

, 34
, 34
, 33
, 35
make config, 36
make defconfig, 36
make gconfig, 36
make menuconfig, 36
make oldconfig, 37
make xconfig, 36
, 35
.config, 36
, 33
, 34
, 37

__BIG_ENDIAN,401
__LITTLE_ENDIAN, 401
BITTS_PER_LONG, 392
HZ, 401
PAGE_SHIFT, 403
PAGE_SIZE, 403

CONFIG_PREEMPT, 171
CONFIG_SMP, 171

, 336
, 331
, 331

, 25

, 71; 174
(1),419;431
(n),421;431
, 429
, 391

preemptive, 66
, 66

439

, 66
, 66

-, 430
-, 430

, 343
depmod, 348
EXPORT_SYMROL(), 353
EXPORT_SYMBOL_GPL(), 353
insmod, 348
Makefile, 346
make modules_install, 347
modprobe, 348
MODULE_AUTHOR(), 345
module_exit(), 344
module_init(), 344
MODULE_LICENSE(), 345; 354
module_param(), 351
module_param_array(), 353
module_param_array_named(), 353
module_param_named(), 352
module_param_string, 352
MODULE_PARM_DESC(), 353
rmmod, 348
, 347
, 348
, 347
, 349
, 351
, 343
, 345
, 353

, 430

, 182
find_first_bit(), 183
find_first_zero_bit(), 183
sched_find_first_bit(), 75
, 132; 187
bottom half, 134
]ocal_bh_disable(), 160
local_bh_enable(), 160
softirq, 135; 136
, 160
, 134; 148 :
, 134; 169; 188
open_softirq(), 140
raise_softirq(), 140
, 137
, 137
,137
ksoftirqd, 138; 147

440

, 139
, 139
, 137
, 140
, 141

softirq_action, 136
tasklet_struct, 141; 144
, 134; 139; 169; 187
DECLARE _TASKLET(), 144
DECLARE_TASKLET_DISABLED(), 144
tasklet_disable(), 145
tasklet_disable_nosync(), 145
tasklet_enable(), 145
tasklet_hi_scbedule(), 142
tasklet_init(), 144
tasklet_kill(), 145
tasklet_schedule(), 142; 145
, 142
, 142; 145
, 141
,144

, 311; 316
, 320
, 317
, 319
close(),319
nopage(), 320
open(), 319
populate{),320
, 317
, 320

vm_area_struct, 319
vm_operations_struct, 319
, 317
, 325

kobject, 356
kobject_get(), 361
kobject_init(), 360
kobject_put(), 361
kobject_set_name(}, 361
, 360
, 361
, 360
, 361
, 361
, 361

Linux, 25
Multics, 23
Unix, 23

AT&T, 23
BSD, 24
, 24
,65
, 26
, 311

BUG(), 382
BUG_ON(),382
dump_stack(),382
, 381
, 382
, 385
, 381
SysRq, 382
, 387
ksymoops, 380
kallsyms, 380

gdb,384
kdb,385
kgdb,385
, 136
cancel_delayed_work(), 156
create_workqueue(), 156
DECLARE_WORK(), 154
flush_scheduled_work{), 155
flush_workqueue(), 156
INIT_WORK{), 154
queue_work{), 156
run_workqueue(), 152
schedule_delayed_work(), 155
schedule_work{), 155
work_handler(), 154
work queue, 149
keventd, 157
, 154
, 155
, 134; 157
, 155
, 150
, 150
, 156

cpu_workqueue_struct, 150
work_struct, 151
workqueue_struct, 150

, 312; 325
, 312
, 312
-, 319
, 381
, 318; 322
, , 312

, 322; 325
, 318

add_wait_queue(), 82
DECLARE_WAIT_O_UEUE_HEAD(), 82
remove_wait_queue(), 83

MMU, Memory Management Unit, 233


, 311

, 381
, 403
, 234; 311

__alloc_percpu(), 261
__get_free_pages(), 238
alloc_page(),238
alloc_pages, 238
alloc_percpu(), 261
DEFINE_PER_CPU(), 260
get_zeroed_page(), 238
gfp_mask, 238; 241
kmalloc(), 240
kmap(), 257
kmap_atomic(), 258
page_address(), 238
vmalloc(),246
, 247
, 258
, 240
, 245
, 242
, 241
, 245
, 238
, 245
, 257
, 257
(per-CPU), 259
(slab layer, slab
allocator), 248
, 248
, 257
, 238;
240; 246
, 243
, 313
, 315
, 314
, 315
, 41
, 235

441

ZONE_DMA, 235
ZONE_HIGHMEM, 235
ZONE_NORMAL, 235
(high memory), 234; 236
(low memory), 236
, 263
, 311

_free_pages(),239
free_page(),239
free_pages(), 239
free_percpu(), 261
kfree(), 245
kunmap(), 258
kunmap_atomic(), 259
vftee(), 247
(, DMA), 235

get_cpu_ptr(), 262
get_cpu_var(), 260
put_cpu_ptr(), 262
put_cpu_var(), 260
, 47
kmem_cache_alloc(), 254
kmem_cache_create(), 252
kmem_cache_destroy{), 253
NUMA, 249
, 250
, 249
, 381
(slab), 249

, 381
, 312
, 42; 256
, 233
page_count(), 234
, 234
, 234
, 318
, 297
, 337
, 54
, 312
, 402
, 234
, 235

kmem_cache_s, 250
mm_struct, 313
page, 234;299
slab, 250
vm_area_struct, 316; 319
zone, 237
, 234

442

pseudo-concurrency, 169
race condition, 164

SMP-, 170
, 170
, 170
, 169
, 163
, 164
, 57
, 403
,169
, 163;
170
, 164
"", 164
, 42; 164
, 87
, 43; 389

, 66

, 68
, 68
, 65

0(1), 66; 71; 175


, 83
, 74
-, 302
CFQ, 308
deadline, 304
noop, 309
, 305
, 303
, 302
, 307
, 302
, 302
, 304
, 309

, 308
, 74
, 76
, 76
, 72; 174
, 89
, 67
SCHED_FIFO, 89
SCHED_RR, 90

prio_array, 74
runqueue, 72
, 358

, 180
, 403
, 180; 202
barrier(), 204
mb(), 203
read_barrier_depends(), 203
rmb(), 202
smp_mb()(204
smp_rmb(), 204
smp_wmb(), 204
wmb(), 203
, 203
, 204
, 202
, 403

__be32_to_cpu(),401
__cpu_to_be32(),401
__cpu_to_le32(),401
__le32_to_cpus(),401
big-endian, 399
little-endian, 399
,399
,400
, 399
,399
, 45; 57; 315
, 59; 316
kernel_thread(), 59
, 164; 169
, 201
, 200
preempt_disable(), 201
preerapt_enable{), 201
, 403
preempt._count, 89; 160
, 27; 109; 169; 427
/proc/interrupts, 123
do_IRQ(), 122
handler, 111
interrupt request line, 110
interrupt service routine, 111
IRQ, 110
ret_from_intr(), 123
, 111; 131
, 111; 119
, 110
, 111; 132
, 111
add_interrupt_randomness(), 122
free_irq(), 114
request_irq(), 112
RTC, 117
shared, 116
, 113

, 115
, 114
, 112
, 116
, 113; 116
, 121
, 124
cli(), 126
disable_irq(), 126
disable_irq_nosync(), 126
enable_irq(), 126
in_interrupt(), 127
in_irq(), 127
irq_disabled(), 127
local_irq_disable(), 125
local_irq_enable(), 125
local_irq_restore(), 125
local_irq_save(), 125
sti(), 126
synchronize_irq(), 126
, 125
, 125

SA_INTERRUPT, 113; 117


SA_SAMPLE_RANDOM, 113; 427
SA_SHIRQ, 113; 116

,27
,51
, 27; 51

I/O-bound, 67
ink, 52
parent, 52
processor-bound, 67
runnable, 65
timeslice, 66
wake_up(),83
, 51; 311
, 70
, 88
, 88
, 65
, 46; 289
, 47
, 61
, 59
, 48
, 52
, 66; 69; 209
, 52; 104
, 290
current, 49
, 65
-, 67

443

, 67

wakc_up(), 232
, 45

nice, 68
, 61
, 46
, 68; 78
(namespace), 291 .
, 46
, 53
, 5()
sel_current_stale(), 51
set_task_state(), 51
sleep, 81
TASK_UNTERRUFTIBLE, 50; 81; 230
TASK_RUNNING, 50; 70
TASK_STOPPED, 51
TASK_UNINTERRUPTIBLE, 51; 81; 230
TASK_ZOMBIE, 51
, 81
, 81
, 170

task_struct, 46
thread_info, 47
. 290
, 61

need_resched, 83; 212


, 423
, 423

add_inlerrupl_randomness(), 426
add_keyboard_randomness(), 426
add_mouse_randomness(), 426
get_random_bytes{), 427

, 338

, 345
, 320; 415
, 417
, 415
, 418
, 416
, 415

__list_for_each(), 422
list_add(), 419
list_add_tail(),419

444

list_del(),419
list_del_init(),419
list_cmpty(), 420
list_cntry(), 421
list_for_each(),421
fist_for_each_prev(), 422
list_for_each_safe(), 422
list_move(), 420
list_move_tail(),420
iist_splice(),420
list_splice_init(), 420
, 416; 421
, 417
.
bss, 312
, 312
, 312
, 294
, 294
, 26
, 27
errno, 97
errnosys_call_table, 98
getpid(), 97
int $0x80, 99
ioctl(), 101; 368
mmap(), 326
mmap2(),, 326
munmapO, 327
sys_ni_syscall(), 98
syscall, 97
syscall(), 106

..

, 106
asmlinkage, 98
, 98
, 101
, 100
, 91
nice(),91
sched_get_priority_max{), 91
sched_get_priority_min(), 91; 92
sched_getaffinity(), 91
sched_getcheduler(), 91
sched_getparam(), 91
schcd_getscheduler(), 91 .
sched_rr_get_interval(), 91
sched_setaffinity(), 91
sched_setparam(), 91
sched_setscheduler(), 91
sched_yield(),91;92
, 99

schcd_getaffinity(), 92
schcd_sctaffinity(), 92
, 101

,104
entry.S, 98; 105

Oops, 378
, 376
, 32; 405
maintainers, 412
, 412
, 412
, 32; 405
, 331

, 378
, 219
, 406
ifdef, 410
indent, 411
typedef, 410
, 407
, 408
, 411
, 408
, 406
, 406
,408
, 331

address_space, 332
address_space_operations, 334

attribute, 357; 366


cdev, 356
kobj_type, 357
kobject, 356
kref, 362
kref_get(), 362
kref_init(), 362
kref_put(), 362
kset, 358
list_head, 417
subsystem, 358
, 174

, 327
PGD, 328
PMD, 328
, 328
, 328
, 328
, 329
,328
, 139
APIC, 219

(tick), 208
, 211
, 218
, 208; 223
, 226
, 223
BogoMIPS, 229
mdelay(), 229
udelay(),229
, 229
, 232
, 227
(tick), 208

HZ, 209
USER_HZ, 217

add_timer(),225
del_timer(), 225
del_timer_sync(), 226
init_timer(),224
mod_timer(),225
schedule_timeout(), 230

jiffies, 225
jiffies_64, 214
xtime, 219; 221
jiffies, 213
, 401
, 215
, 207; 208; 219
(PIT), 218
, 207; 218
, 219
, 208

timer_list, 224
timespec, 222
(TSC), 219
(tick rate), 208; 209
, 117; 218
, 135; 223

char, 396
sl6, 395
s32, 395
s64, 395
s8, 395
ul6, 395
u32, 395
u64, 395
u8,395

, 378

445


, 319
-, 294

kobject_uevent(), 370
kobjcct_uevcnt_atomic{), 371

, 293
, 293
, 355

System.map, 380

/, 363
sysfs, 352; 363
HAL, 365
kobject_add(), 365
kobject_del(),365
kobject_init(), 365
kobject_register(), 365
kobject_unregister(), 365
sysfs_create_file(), 367
sysfs_crcate_link(), 367
sysfs_remove_file(), 368
sysfs_remove_link(), 368
, 366
, 357; 366
, 363
show(), 367
store(), 367
, 368
sysfs_ops, 366
, 366
, 294
(VFS), 265
(LVM), 273
, 365
(directory), 268
(metadata), 268
(mount), 288
mnt_flags, 289
, 266
, 269

alloc_inode, 273
clear_inode, 274
delete_inode, 273
destray_inode, 273
dirty_inode, 273
drop_inode, 273
put_inode, 273

446

put_super, 273
read_inode, 273
remount_fs, 274
statfs, 273
sync_fs, 273
umount_begin, 274
unlockfs, 273
write_inode, 273
write_super, 273
write_super_lockfs, 273

alo_fsync, 286
aio_read, 285
aio_write, 285
check_flags, 287
fasync, 286
flush, 286
fsync, 286
get_unmapped_area, 287
ioctl, 286
llseek, 285
lock, 286; 287
mmap, 286
open,286
poll, 285
read, 285
readdir, 285
readv, 286
release, 286
sendfile, 287
sendpage, 287
write, 285
writev, 287

create, 276
follow_link, 277
getattr, 278
link, 277
listxattr, 278
lookup, 276; 278
mkdir, 277
mknod, 277
permission, 278
put_link, 277
readlink, 277
femovexattr, 278
rename, 277
rmdir, 277
setattr, 278
setxattr, 278
symlink, 277
truncate, 277
unlink, 277

d_compare, 282

d_delete, 282
d_hash, 282
d_iput, 282
d_release, 282
d_revalidate, 282
Unix, 267
, 55
(namespace), 267; 291
(path), 268
, 266

dentry, 279; 356


dentry_operations, 270; 281
file, 283
file_operations, 270; 284
file_struct, 270
file_system_type, 270; 288
files_struct, 289
fs_struct, 270; 289
inode, 274
inode_operations, 270; 276
namespace, 270; 289
super_block, 271
super_operations, 270; 272
vfsmount, 270; 288
(superblock), 268; 269; 270
(control block), 270
(inode), 268; 269; 274
(icache), 281
(file), 267; 269; 283
(dentry), 268; 269; 279
LRU, 280
(dcache),280
, 280
-, 281

bread(), 332
clone(),54;57
commit_writc(), 335
context_switch(), 390
copy_from_user(), 102
copy_mm(), 315
copy_process(), 55
copy_to_user(), 102
dup_task_struct(), 55
early_printk(),376
exec(), 54
exit(),46;59
exit_mm(), 315
flnd_vma(), 323

find_VMA_intersection(),
find_vma_prev(), 324
fork(),46;54;315
madvice(), 319
mmap(),319;325
munmap(), 327
panic(), 382
prepare_wrife(), 335
printk(), 39; 375
readpage(), 335
release_task(), 61
schedule(),76;87;104
SetPageDirty(),335
switch_mm(), 390
switch_to(),390
unhash_process(), 61
vfork(),54;56
vma_link(), 326
wakeup_bdflush(), 337
wb_kupdate(), 338

324

X
- , 336

, 295

, 423
, 424
, 424

Linux, 30
, 31
, 31
, 31
, 31
tainted, 345
Unix, 29
, 374
, 29
, 343
, 29; 343
,38
, 373
, 369
,29

447

Linux
2-
..
..
.
JI.A. , .. ,
.. , ..

""
101509, . , . , . 43, . 1
27.07.2006. 70x100/16.
Times. .
. . . 36,12. .-. . 28,86.
3000 . 2169.

CtP
" " . . .
197110, -, ., 15.

Linux


Linux. ,
, ,

, ,
.
Linux,
,
.
,
.
Linux.
Linux
2.6. ,
,
, ,
, , VFS, ,
.
, 2.6,
O(1), , -
-.






Linux
.


Linux,

GNOME


Ximian Desktop
Novell.
,
,

,
,
,
,

.
schedutils
GNOME.



.

...
Linux
2.6


(Andrew Morton),

Linux 2.6.



Novell, Inc.

:

Linux
:

www.novellpress.com

ISBN 5-8459-1085-4
06171

www.williamspublishing.com

Novell -
. Novell Press Ximian -
Novell, Inc., . Linux .