第六章.使用
來輸入
/proc
6.1 TODO:寫一章關于sysfs
這個現在僅是個預留位置。最終我想要看到一個(準備寫的)章節關于sysfs來替代這裡。如果你對于sysfs熟悉,并且想要加入到寫着一個章節,那麼請聯系我們(LKMPG維護者)。第七章.談談裝置檔案
7.1談談裝置檔案(writes and IOCTLs)
裝置檔案應該代表了實體裝置。大多數實體裝置用來輸入和輸出,是以就必須在核心中為裝置驅動得到輸出從程序發送到裝置的的一些機制來實作。這個實作時通過為輸出和寫檔案打開一個裝置檔案,僅僅像寫到檔案一樣。在下面的例子中,通過
device_write
來實作。
這個通常是不夠的,想象下,你有一個序列槽連接配接到了一個貓(數據機,modem)(即使你有一個内部貓,它從CPU的觀點來看依舊是當作序列槽來實作連接配接到數據機上的,是以你沒必要給你自己想太多)。最自然的做法就是使用裝置檔案來寫東西到數據機上(要麼是數據機指令,要麼是資料通過電話線發過去)和從數據機上讀資訊(要麼是指令的回複,要麼就是通過電話線收到的資訊)。然而,這個一節打開了一個問題關于當你需要與序列槽自己對話的時候要做什麼,例如,發送和接受資料需要以什麼發送速率。
在Unix中的回答是使用一個特殊的函數叫做
(Input Output ConTroL的縮寫)。每一個裝置能有自己的ioctl指令,這個指令被讀
ioctl
的東西(從一個程序到核心發送資訊),寫到
ioctl
中(傳回資訊到程序),這個要麼就是兩個都做要麼就兩個都不做的事情。
ioctl
ioctl
函數被調用時有3個參數:
1. 對應裝置檔案的檔案描述
2. ioctl number
3. 一個長整形,你能用它來傳遞東西(一般為指針指向結構)
ioctl number 為主要裝置号,ioctl類型,指令和參數類型進行編碼。這個ioctl number通常由在頭檔案的宏定義調用(
,
_IO,_IOR
,或者
_IOW
–取決于類型)來建立。這個頭檔案應該被使用了ioctl的程式(那麼他們能産生對應的
_IOWR
)和核心子產品(讓核心能明白程式幹什麼) 來包括(include進去)。在下面的例子中,頭檔案是
ioctl'S
,程式使用了頭檔案中代表的
chardev.h
ioctl.c
。
如果你想在你的核心子產品用
,最好接受一個官方配置設定的ioctl,這樣假使你意外的得到了别人的
ioctl'S
或者别人得到了你的
ioctl'S
,你會知道發生了什麼錯誤。想要更多資訊,咨詢核心源碼樹中的
ioctl'S
Documentation/ioctl-number.txt.
例子7-1.chardev.c
在新版本核心中ioctl會不識别
/*
* chardev.c - Create an input/output character device
*/
#include <linux/kernel.h> /* We're doing kernel work */
#include <linux/module.h> /* Specifically, a module */
#include <linux/fs.h>
#include <asm/uaccess.h> /* for get_user and put_user */
#include "chardev.h"
#define SUCCESS 0
#define DEVICE_NAME "char_dev"
#define BUF_LEN 80
/*
* Is the device open right now? Used to prevent
* concurent access into the same device
*/
static int Device_Open = 0;
/*
* The message the device will give when asked
*/
static char Message[BUF_LEN];
/*
* How far did the process reading the message get?
* Useful if the message is larger than the size of the
* buffer we get to fill in device_read.
*/
static char *Message_Ptr;
/*
* This is called whenever a process attempts to open the device file
*/
static int device_open(struct inode *inode, struct file *file)
{
#ifdef DEBUG
printk(KERN_INFO "device_open(%p)\n", file);
#endif
/*
* We don't want to talk to two processes at the same time
*/
if (Device_Open)
return -EBUSY;
Device_Open++;
/*
* Initialize the message
*/
Message_Ptr = Message;
try_module_get(THIS_MODULE);
return SUCCESS;
}
static int device_release(struct inode *inode, struct file *file)
{
#ifdef DEBUG
printk(KERN_INFO "device_release(%p,%p)\n", inode, file);
#endif
/*
* We're now ready for our next caller
*/
Device_Open--;
module_put(THIS_MODULE);
return SUCCESS;
}
/*
* This function is called whenever a process which has already opened the
* device file attempts to read from it.
*/
static ssize_t device_read(struct file *file, /* see include/linux/fs.h */
char __user * buffer, /* buffer to be
* filled with data */
size_t length, /* length of the buffer */
loff_t * offset)
{
/*
* Number of bytes actually written to the buffer
*/
int bytes_read = 0;
#ifdef DEBUG
printk(KERN_INFO "device_read(%p,%p,%d)\n", file, buffer, length);
#endif
/*
* If we're at the end of the message, return 0
* (which signifies end of file)
*/
if (*Message_Ptr == 0)
return 0;
/*
* Actually put the data into the buffer
*/
while (length && *Message_Ptr) {
/*
* Because the buffer is in the user data segment,
* not the kernel data segment, assignment wouldn't
* work. Instead, we have to use put_user which
* copies data from the kernel data segment to the
* user data segment.
*/
put_user(*(Message_Ptr++), buffer++);
length--;
bytes_read++;
}
#ifdef DEBUG
printk(KERN_INFO "Read %d bytes, %d left\n", bytes_read, length);
#endif
/*
* Read functions are supposed to return the number
* of bytes actually inserted into the buffer
*/
return bytes_read;
}
/*
* This function is called when somebody tries to
* write into our device file.
*/
static ssize_t
device_write(struct file *file,
const char __user * buffer, size_t length, loff_t * offset)
{
int i;
#ifdef DEBUG
printk(KERN_INFO "device_write(%p,%s,%d)", file, buffer, length);
#endif
for (i = 0; i < length && i < BUF_LEN; i++)
get_user(Message[i], buffer + i);
Message_Ptr = Message;
/*
* Again, return the number of input characters used
*/
return i;
}
/*
* This function is called whenever a process tries to do an ioctl on our
* device file. We get two extra parameters (additional to the inode and file
* structures, which all device functions get): the number of the ioctl called
* and the parameter given to the ioctl function.
*
* If the ioctl is write or read/write (meaning output is returned to the
* calling process), the ioctl call returns the output of this function.
*
*/
int device_ioctl(struct inode *inode, /* see include/linux/fs.h */
struct file *file, /* ditto */
unsigned int ioctl_num, /* number and param for ioctl */
unsigned long ioctl_param)
{
int i;
char *temp;
char ch;
/*
* Switch according to the ioctl called
*/
switch (ioctl_num) {
case IOCTL_SET_MSG:
/*
* Receive a pointer to a message (in user space) and set that
* to be the device's message. Get the parameter given to
* ioctl by the process.
*/
temp = (char *)ioctl_param;
/*
* Find the length of the message
*/
get_user(ch, temp);
for (i = 0; ch && i < BUF_LEN; i++, temp++)
get_user(ch, temp);
device_write(file, (char *)ioctl_param, i, 0);
break;
case IOCTL_GET_MSG:
/*
* Give the current message to the calling process -
* the parameter we got is a pointer, fill it.
*/
i = device_read(file, (char *)ioctl_param, 99, 0);
/*
* Put a zero at the end of the buffer, so it will be
* properly terminated
*/
put_user('\0', (char *)ioctl_param + i);
break;
case IOCTL_GET_NTH_BYTE:
/*
* This ioctl is both input (ioctl_param) and
* output (the return value of this function)
*/
return Message[ioctl_param];
break;
}
return SUCCESS;
}
/* Module Declarations */
/*
* This structure will hold the functions to be called
* when a process does something to the device we
* created. Since a pointer to this structure is kept in
* the devices table, it can't be local to
* init_module. NULL is for unimplemented functions.
*/
struct file_operations Fops = {
.read = device_read,
.write = device_write,
.ioctl = device_ioctl,
.open = device_open,
.release = device_release, /* a.k.a. close */
};
/*
* Initialize the module - Register the character device
*/
int init_module()
{
int ret_val;
/*
* Register the character device (atleast try)
*/
ret_val = register_chrdev(MAJOR_NUM, DEVICE_NAME, &Fops);
/*
* Negative values signify an error
*/
if (ret_val < 0) {
printk(KERN_ALERT "%s failed with %d\n",
"Sorry, registering the character device ", ret_val);
return ret_val;
}
printk(KERN_INFO "%s The major device number is %d.\n",
"Registeration is a success", MAJOR_NUM);
printk(KERN_INFO "If you want to talk to the device driver,\n");
printk(KERN_INFO "you'll have to create a device file. \n");
printk(KERN_INFO "We suggest you use:\n");
printk(KERN_INFO "mknod %s c %d 0\n", DEVICE_FILE_NAME, MAJOR_NUM);
printk(KERN_INFO "The device file name is important, because\n");
printk(KERN_INFO "the ioctl program assumes that's the\n");
printk(KERN_INFO "file you'll use.\n");
return 0;
}
/*
* Cleanup - unregister the appropriate file from /proc
*/
void cleanup_module()
{
int ret;
/*
* Unregister the device
*/
ret = unregister_chrdev(MAJOR_NUM, DEVICE_NAME);
/*
* If there's an error, report it
*/
if (ret < 0)
printk(KERN_ALERT "Error: unregister_chrdev: %d\n", ret);
}
例子7-2.chardev.h
/*
* chardev.h - the header file with the ioctl definitions.
*
* The declarations here have to be in a header file, because
* they need to be known both to the kernel module
* (in chardev.c) and the process calling ioctl (ioctl.c)
*/
#ifndef CHARDEV_H
#define CHARDEV_H
#include <linux/ioctl.h>
/*
* The major device number. We can't rely on dynamic
* registration any more, because ioctls need to know
* it.
*/
#define MAJOR_NUM 100
/*
* Set the message of the device driver
*/
#define IOCTL_SET_MSG _IOR(MAJOR_NUM, 0, char *)
/*
* _IOR means that we're creating an ioctl command
* number for passing information from a user process
* to the kernel module.
*
* The first arguments, MAJOR_NUM, is the major device
* number we're using.
*
* The second argument is the number of the command
* (there could be several with different meanings).
*
* The third argument is the type we want to get from
* the process to the kernel.
*/
/*
* Get the message of the device driver
*/
#define IOCTL_GET_MSG _IOR(MAJOR_NUM, 1, char *)
/*
* This IOCTL is used for output, to get the message
* of the device driver. However, we still need the
* buffer to place the message in to be input,
* as it is allocated by the process.
*/
/*
* Get the n'th byte of the message
*/
#define IOCTL_GET_NTH_BYTE _IOWR(MAJOR_NUM, 2, int)
/*
* The IOCTL is used for both input and output. It
* receives from the user a number, n, and returns
* Message[n].
*/
/*
* The name of the device file
*/
#define DEVICE_FILE_NAME "char_dev"
#endif
例子7-3.ictl.c
/*
* ioctl.c - the process to use ioctl's to control the kernel module
*
* Until now we could have used cat for input and output. But now
* we need to do ioctl's, which require writing our own process.
*/
/*
* device specifics, such as ioctl numbers and the
* major device file.
*/
#include "chardev.h"
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h> /* open */
#include <unistd.h> /* exit */
#include <sys/ioctl.h> /* ioctl */
/*
* Functions for the ioctl calls
*/
ioctl_set_msg(int file_desc, char *message)
{
int ret_val;
ret_val = ioctl(file_desc, IOCTL_SET_MSG, message);
if (ret_val < 0) {
printf("ioctl_set_msg failed:%d\n", ret_val);
exit(-1);
}
}
ioctl_get_msg(int file_desc)
{
int ret_val;
char message[100];
/*
* Warning - this is dangerous because we don't tell
* the kernel how far it's allowed to write, so it
* might overflow the buffer. In a real production
* program, we would have used two ioctls - one to tell
* the kernel the buffer length and another to give
* it the buffer to fill
*/
ret_val = ioctl(file_desc, IOCTL_GET_MSG, message);
if (ret_val < 0) {
printf("ioctl_get_msg failed:%d\n", ret_val);
exit(-1);
}
printf("get_msg message:%s\n", message);
}
ioctl_get_nth_byte(int file_desc)
{
int i;
char c;
printf("get_nth_byte message:");
i = 0;
do {
c = ioctl(file_desc, IOCTL_GET_NTH_BYTE, i++);
if (c < 0) {
printf
("ioctl_get_nth_byte failed at the %d'th byte:\n",
i);
exit(-1);
}
putchar(c);
} while (c != 0);
putchar('\n');
}
/*
* Main - Call the ioctl functions
*/
main()
{
int file_desc, ret_val;
char *msg = "Message passed by ioctl\n";
file_desc = open(DEVICE_FILE_NAME, 0);
if (file_desc < 0) {
printf("Can't open device file: %s\n", DEVICE_FILE_NAME);
exit(-1);
}
ioctl_get_nth_byte(file_desc);
ioctl_get_msg(file_desc);
ioctl_set_msg(file_desc, msg);
close(file_desc);
}
第八章 系統調用
8.1 系統調用
到目前為止,我們唯一做的事情就是使用已經定義好的核心機制來注冊
/proc
檔案和裝置處理方法。如果你對核心程式按你所想做點什麼已經夠了,例如寫一個裝置驅動。但是如果你想要做點不一樣的事情,像用某種方法改變系統的行為?那麼,你可能得靠自己了。
這也就是核心程式設計有危險的地方。當寫一個下面的例子的時候,我殺掉了
的系統調用。這個意味着我不能打開任何檔案,我不能運作任何程式,并且我不能關掉計算機,不得不關電源。幸運的是,沒有檔案消亡。為了保證你不會丢失檔案,請在
open()
和
insmod
之前運作
rmmod
sync
.
忘記
檔案,忘記裝置檔案。他們隻是小細節。對于核心交流機制,(所有的程序用的),的真實過程是系統調用。當一個程序請求一個從核心的服務時候(例如打開一個檔案,fork一個新的程序,或者請求更多的記憶體),這個就是現在的機制所使用的。如果你想要用有趣的方法改變核心的行為,下面将介紹。順便的是,如果你想要看到程式使用的哪個系統調用,運作
/proc
strace<arguments>
通常來說,一個程序不應該能進入核心。它不能進入核心記憶體并且它不能調用核心核心函數。CPU的硬體已經定死了(這就是稱為保護模式的原因)。
系統調用對于這個規則是一個特例。程序用恰當的值然後調用一個特殊的指令來完成注冊,這個特殊的指令會跳到一個提前在核心定義好的地方(當然,這個位置是可以被使用者程序可讀的,但是不可悲使用者程序寫操作)。在Intel cpu中,這個叫做0x80中斷。硬體知道一旦你跳到了那個位置,你就不再運作在受限制的使用者模式,而是作為作業系統的核心--是以你就能被允許做你想做的了。
在核心中的那個位置,使得一個程序能跳轉,這個就被稱為
(系統調用).在那個位置的程式檢查系統調用的号碼,這個号碼告訴核心這個程序請求了什麼服務。然後,它會看系統調用的那個表(
system_call
)查核心函數的位址來調用。接下來,調用函數。在它傳回之後,做一些系統檢查在就傳回到程序(或者說如果程序時間用完了就傳回到别的程序)。如果你想要讀這個代碼,它的源檔案在
sys_call_table
,在
arch/$<$architecture$>$/kernel/entry.S
ENTRY(system_call)
這一行的後面。
是以,如果你想要改變某一個系統調用的工作的方式,我們需要做的就是寫我們自己的函數來實作它(通常的是加入一點點我們自己的代碼,然後調用原始的函數)然後改變指針由指向
到指向我們自己的函數。因為我們可能稍後就會被移除,且我們不想要把系統留在一個不穩定的狀态,那麼就非常有必要的在
sys_call_table
cleanup_module
的時候恢複這個table到它原來的狀态。
這裡的源碼僅僅是個例子。我們想要
一個特定的使用者,然後無論何時這個使用者打開一個檔案就
'spy'
一個消息。最後,我們用我們自己的函數取代系統調用打開一個檔案,函數名叫
printk()
.這個函數檢查目前程序的
our_sys_open
(user’s id),然後如果這個程序等于我們監控的程序的uid,它就調用
uid
來顯示打開的檔案的名字。然後不管怎樣,要用同樣的參數調用原始的
printk()
函數,然後真實的打開檔案。
open()
函數替換了
init_module
中所占用的位置,然後使得原始的指針為變量。
sys_call_table
函數使用那個變量來恢複所有的事情到正常狀态。這個方法很危險,因為有可能兩個子產品改變了同一個系統調用。想想看,我們有兩個核心子產品,A和B。A,B打開系統調用操作分别是
cleanup_module
和
A_open
。現在,當A插入到核心,系統調用被
B_open
取代了,然後它做完後會調用原有
A_open
,接下來,B插入到核心,
sys_open
取代了系統調用,這個
B_open
會調用會在它做完後調用它認為的原有的系統調用:
B_open
A_open
。
現在,如果B首先被移除,那麼所有的事情就安好---事情就簡單的恢複到系統調用A_open然後再調用原有的系統調用。然而,如果A先被移除,B再被移除,那麼系統就會當機。A的移除會恢複系統到原有的系統調用,
,使得B跳出循環。那麼當B被移除了,它會修複系統調用到它認為的原始系統調用:
sys_open
,然而
A_open
已經不在記憶體裡面了。乍一看,好像我們能通過檢查系統調用是否等于我們打開的函數然後如果沒有改變(那麼B在被移除的時候就不會改變系統調用)來解決這個問題,但是那就會導緻一個更糟糕的問題。當A被移除了,它看到系統調用已經轉變成
A_open
了,那麼那個就不會指向
B_open
了,那麼它就不會在a從記憶體移除的時候恢複到
A_open
。不幸的是,
sys_open
還是會嘗試調用
B_open
,及時
A_open
A_open
不在了。那麼到系統當機都不會移除B了。
注意到所有的這些問題都會使得偷用
在産品用途上變的不可行。為了使人們遠離這種潛在的危險,
syscall
不再提供出口了。這就意味着,如果你想要做的不僅僅是跑這個例子,你需要給你目前的核心打更新檔以此來使得
sys_call_table
sys_call_table
變的可擴充。在例子的根目錄下,你會找到一個README和更新檔。你可以想象的是,這種修改不是一點點的小修改。不要嘗試這個在一個有用的系統上(例如,你不擁有的系統或者不能簡單恢複的系統)。你要得到這個導論的完整的源代碼作為打包(tarball),為的就是得到更新檔和README。而且取決于你核心的版本,你可能甚至需要手動應用這些更新檔。
例子8-1.syscall.c
/*
* syscall.c
*
* System call "stealing" sample.
*/
/*
* Copyright (C) 2001 by Peter Jay Salzman
*/
/*
* The necessary header files
*/
/*
* Standard in kernel modules
*/
#include <linux/kernel.h> /* We're doing kernel work */
#include <linux/module.h> /* Specifically, a module, */
#include <linux/moduleparam.h> /* which will have params */
#include <linux/unistd.h> /* The list of system calls */
/*
* For the current (process) structure, we need
* this to know who the current user is.
*/
#include <linux/sched.h>
#include <asm/uaccess.h>
/*
* The system call table (a table of functions). We
* just define this as external, and the kernel will
* fill it up for us when we are insmod'ed
*
* sys_call_table is no longer exported in 2.6.x kernels.
* If you really want to try this DANGEROUS module you will
* have to apply the supplied patch against your current kernel
* and recompile it.
*/
extern void *sys_call_table[];
/*
* UID we want to spy on - will be filled from the
* command line
*/
static int uid;
module_param(uid, int, 0644);
/*
* A pointer to the original system call. The reason
* we keep this, rather than call the original function
* (sys_open), is because somebody else might have
* replaced the system call before us. Note that this
* is not 100% safe, because if another module
* replaced sys_open before us, then when we're inserted
* we'll call the function in that module - and it
* might be removed before we are.
*
* Another reason for this is that we can't get sys_open.
* It's a static variable, so it is not exported.
*/
asmlinkage int (*original_call) (const char *, int, int);
/*
* The function we'll replace sys_open (the function
* called when you call the open system call) with. To
* find the exact prototype, with the number and type
* of arguments, we find the original function first
* (it's at fs/open.c).
*
* In theory, this means that we're tied to the
* current version of the kernel. In practice, the
* system calls almost never change (it would wreck havoc
* and require programs to be recompiled, since the system
* calls are the interface between the kernel and the
* processes).
*/
asmlinkage int our_sys_open(const char *filename, int flags, int mode)
{
int i = 0;
char ch;
/*
* Check if this is the user we're spying on
*/
if (uid == current->uid) {
/*
* Report the file, if relevant
*/
printk("Opened file by %d: ", uid);
do {
get_user(ch, filename + i);
i++;
printk("%c", ch);
} while (ch != 0);
printk("\n");
}
/*
* Call the original sys_open - otherwise, we lose
* the ability to open files
*/
return original_call(filename, flags, mode);
}
/*
* Initialize the module - replace the system call
*/
int init_module()
{
/*
* Warning - too late for it now, but maybe for
* next time...
*/
printk(KERN_ALERT "I'm dangerous. I hope you did a ");
printk(KERN_ALERT "sync before you insmod'ed me.\n");
printk(KERN_ALERT "My counterpart, cleanup_module(), is even");
printk(KERN_ALERT "more dangerous. If\n");
printk(KERN_ALERT "you value your file system, it will ");
printk(KERN_ALERT "be \"sync; rmmod\" \n");
printk(KERN_ALERT "when you remove this module.\n");
/*
* Keep a pointer to the original function in
* original_call, and then replace the system call
* in the system call table with our_sys_open
*/
original_call = sys_call_table[__NR_open];
sys_call_table[__NR_open] = our_sys_open;
/*
* To get the address of the function for system
* call foo, go to sys_call_table[__NR_foo].
*/
printk(KERN_INFO "Spying on UID:%d\n", uid);
return 0;
}
/*
* Cleanup - unregister the appropriate file from /proc
*/
void cleanup_module()
{
/*
* Return the system call back to normal
*/
if (sys_call_table[__NR_open] != our_sys_open) {
printk(KERN_ALERT "Somebody else also played with the ");
printk(KERN_ALERT "open system call\n");
printk(KERN_ALERT "The system may be left in ");
printk(KERN_ALERT "an unstable state.\n");
}
sys_call_table[__NR_open] = original_call;
}