ファイルのメモリマッピング(mmap)

ファイルのメモリマッピングにより,head, tail コマンドを実装する例題です.

メモリマッピング(メモリマップドファイル: memory mapped file とも呼ばれる)はファイルの内容にアクセスする方法で,ファイルをアドレス空間にマップし,ファイル内容を配列のように扱うことができます.

head(list_82-1.c)
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>

//------------------------------------------------------------
void usage(char* cmd);
void myhead(char* infile,int n_output,int n_fold);
char *mmap_file(char *name,size_t *filesizep);
//------------------------------------------------------------

int main(int argc,char* argv[])
{
    int i;
    static int n_output=10,n_fold=0;
    char *in_file=NULL;
    // read argument
    for(i=1;i<argc;i++){
        if(argv[i][0] == '-'){
            switch(argv[i][1]){
            case 'n':
                if(argv[++i] == NULL){
                    fprintf(stderr,"Output number err.\n");
                    usage(argv[0]);
                    return 0;
                }
                n_output = atoi(argv[i]);
                break;
            case 'f':
                if(argv[++i] == NULL){
                    fprintf(stderr,"Fold number not err.\n");
                    usage(argv[0]);
                    return 0;
                }
                n_fold = atoi(argv[i]);
                break;
            default:
                fprintf(stderr,"Unknown option '%c'\n",argv[i][1]);
                usage(argv[0]);
                return 0;
            }
        } else {
            in_file = argv[i];
        }
    }
    if(in_file == NULL){
        usage(argv[0]); return 0;
    }

    myhead(in_file,n_output,n_fold);
    
    return 0;
}

void usage(char* cmd)
{
    printf("usage: %s in_file [option]\n",cmd);
    printf("[option]:\n");
    printf("  -n number: set output line number\n");
    printf("  -f number: set fold number\n");
}

//------------------------------------------------------------
// ファイル先頭部の表示
//------------------------------------------------------------
void myhead(char* infile,int n_output,int n_fold)
{
    size_t size;
    char *file_address;
    char c,old_c;
    long ri,n_byte,n_line;
    
    file_address = mmap_file(infile,&size);
    if(((int)file_address) == -1) {
        perror(infile);
        return;
    }
    
    // output head
    ri = n_byte = n_line = 0;
    while(n_line < n_output){
        if(ri >= size) break;
        c = file_address[ri];
        old_c = c;
        if(c != '\7') write(1,&c,1);
        if(c == '\n' || (n_fold && ++n_byte >= n_fold)){
            n_byte = 0; n_line++;
            if(c!='\n'){
                old_c = '\n';
                write(1,"\n",1);
            }
        }
        ri++;
    }
    if(old_c != '\n') write(1,"\n",1);
}

//------------------------------------------------------------
// ファイルのメモリマッピング(memory mapped file)
//------------------------------------------------------------
char *mmap_file(char *name,size_t *filesizep)
{
    int fd;
    struct stat buf;
    size_t size;
    off_t off;
    caddr_t addr;
    if((fd = open(name,O_RDONLY)) == -1)
        return((char *)-1);
    fstat(fd,&buf);
    size = buf.st_size;
    addr = 0; off = 0;
    addr = mmap(addr,size,PROT_READ,MAP_PRIVATE,fd,off);
    if((int)addr == -1) {
        close(fd);
        return((char *)-1);
    }
    close(fd);
    if(filesizep) *filesizep = size;
    return(addr);
}











実行結果
Gami[2106]% ./myhead.exe myhead.c
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>

//------------------------------------------------------------
void usage(char* cmd);
Gami[2107]% ./myhead.exe myhead.exe -n 10 -f 30
MZ       栫  失       @

氈  室 雫      赦!失L赦!This program
 cannot be run in DOS mode.
$       PE  L J搬B   蔀
8             
  @                     `
     鴫                
                P   

Gami[2108]%

tail(list_82-2.c)
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>

//------------------------------------------------------------
void usage(char* cmd);
void mytail(char* infile,int n_output,int n_fold);
char *mmap_file(char *name,size_t *filesizep);
//------------------------------------------------------------

int main(int argc,char* argv[])
{
    int i,n_output=10,n_fold=0;
    char *in_file=NULL;
    // read argument
    for(i=1;i<argc;i++){
        if(argv[i][0] == '-'){
            switch(argv[i][1]){
            case 'n':
                if(argv[++i] == NULL){
                    fprintf(stderr,"Output number not err.\n");
                    usage(argv[0]);
                    return 0;
                }
                n_output = atoi(argv[i]);
                break;
            case 'f':
                if(argv[++i] == NULL){
                    fprintf(stderr,"Fold number not err.\n");
                    usage(argv[0]);
                    return 0;
                }
                n_fold = atoi(argv[i]);
                break;
            default:
                fprintf(stderr,"Unknown option '%c'\n",argv[i][1]);
                usage(argv[0]);
                return 0;
            }
        } else {
            in_file = argv[i];
        }
    }
    if(in_file == NULL){
        usage(argv[0]); return 0;
    }

    mytail(in_file,n_output,n_fold);
    
    return 0;
}

void usage(char* cmd)
{
    printf("usage: %s in_file [option]\n",cmd);
    printf("[option]:\n");
    printf("  -n number: set output line number\n");
    printf("  -f number: set fold number\n");
}

//------------------------------------------------------------
// ・ユ・。・、・良充/span>
//------------------------------------------------------------
void mytail(char* infile,int n_output,int n_fold)
{
    size_t size;
    char *file_address;
    char c,old_c;
    long ri,n_byte,n_line;
    
    file_address = mmap_file(infile,&size);
    if(((int)file_address) == -1) {
        perror(infile);
        return;
    }
    
    // seek tail
    n_byte = n_line = 0;
    c = file_address[size-1]; n_byte++;
    if(c != '\n') n_byte = 0;
    ri = size-2;
    while(n_line < n_output){
        if(ri < 0) break;
        c = file_address[ri];
        if(c=='\n' || (n_fold && ++n_byte >= n_fold)) {
            n_byte = 0; n_line ++;
            if(c=='\n') ri--;
        }
        ri--;
    }
    ri += 3;
    // output tail
    n_byte = 0;
    while(ri < size){
        c = file_address[ri++];
        n_byte ++; old_c = c;
        if(c!='\7') write(1,&c,1);
        if(c=='\n' || (n_fold && n_byte >= n_fold)){
            n_byte = 0;
            if(c!='\n'){
                old_c = '\n'; write(1,"\n",1);
            }
        }
    }
    if(old_c!='\n') write(1,"\n",1);
}

//------------------------------------------------------------
// ファイルのメモリマッピング(memory mapped file)
//------------------------------------------------------------
char *mmap_file(char *name,size_t *filesizep)
{
    int fd;
    struct stat buf;
    size_t size;
    off_t off;
    caddr_t addr;
    if((fd = open(name,O_RDONLY)) == -1)
        return((char *)-1);
    fstat(fd,&buf);
    size = buf.st_size;
    addr = 0; off = 0;
    addr = mmap(addr,size,PROT_READ,MAP_PRIVATE,fd,off);
    if((int)addr == -1) {
        close(fd);
        return((char *)-1);
    }
    close(fd);
    if(filesizep) *filesizep = size;
    return(addr);
}

実行結果
Gami[2108]% ./mytail.exe mytail.c
        addr = 0; off = 0;
        addr = mmap(addr,size,PROT_READ,MAP_PRIVATE,fd,off);
        if((int)addr == -1) {
                close(fd);
                return((char *)-1);
        }
        close(fd);
        if(filesizep) *filesizep = size;
        return(addr);
}
Gami[2109]% ./mytail.exe mytail.exe -n 10 -f 30
process __imp__free __major_im
age_version__ __loader_flags__
 __imp__printf __imp__AddAtomA
@4 _pthread_atfork __head_libk
ernel32_a __minor_subsystem_ve
rsion__ __minor_image_version_
_ _FindAtomA@4 __imp___mmap64
_GetAtomNameA@12 __RUNTIME_PSE
UDO_RELOC_LIST_END__ __libkern
el32_a_iname ___crt_xt_end__
Gami[2110]%
inserted by FC2 system