天天看点

基于Nand Flash的UBIFS多任务读写错误

在Nand Flash上创建UBIFS格式的文件系统,用于存储系统的历史数据。

在实际调试过程中,发现UBIFS文件系统会产生错误,输出信息如下:

[685108.022234] nand_erase_nand: attempt to erase a bad block at page 0x00008580
[685108.050087] nand_erase_nand: attempt to erase a bad block at page 0x00008580
[685108.070232] nand_erase_nand: attempt to erase a bad block at page 0x00008580
[685108.090388] nand_erase_nand: attempt to erase a bad block at page 0x00008580
[685108.097944] UBI error: do_sync_erase: cannot erase PEB 534, error -5
[685108.104750] UBI error: erase_worker: failed to erase PEB 534, error -5
[685108.111725] UBI: mark PEB 534 as bad
[685108.115647] UBI: 17 PEBs left in the reserve
[687322.014321] UBI: scrubbed PEB 1272 (LEB 0:1453), data moved to PEB 1855
[687774.705119] UBI error: ubi_io_read: error -74 (ECC error) while reading 126976 bytes from PEB 456:4096, read 126976 bytes
[687774.733301] UBIFS error (pid 10861): ubifs_check_node: bad CRC: calculated 0x7845a4da, read 0x907d6ad0
[687774.743302] UBIFS error (pid 10861): ubifs_check_node: bad node at LEB 1769:113376
[687774.751381] 	magic          0x6101831
[687774.751389] 	crc            0x907d6ad0
[687774.751398] 	node_type      1 (data node)
[687774.751407] 	group_type     0 (no node group)
[687774.751417] 	sqnum          64559590
[687774.751424] 	len            1667
[687774.751438] 	key            (24037, data, 1463)
[687774.751448] 	size           4096
[687774.751455] 	compr_typ      1
[687774.751463] 	data size      1619
[687774.751469] 	data:
[687774.751486] 	00000000: 0a 61 61 39 0a 30 30 31 65 37 61 61 63 0a c1 01 66 c2 01 62 32 e1 01 35 e1 01 38 e1 01 62 e1 01

[687774.942198] UBIFS error (pid 10861): ubifs_scan: bad node
[687774.947989] UBIFS error (pid 10861): ubifs_scanned_corruption: corruption at LEB 1769:113376
[687774.956997] UBIFS error (pid 10861): ubifs_scanned_corruption: first 8192 bytes from LEB 1769:113376
[687774.966778] 00000000: 06101831 907d6ad0 03d919e6 00000000 00000683 00000001 00005de5 200005b7  1....j}..................]..... 

[687774.973358] UBIFS error (pid 10861): ubifs_scan: LEB 1769 scanning failed
[687774.980645] UBIFS warning (pid 10861): ubifs_ro_mode: switched to read-only mode, error -117

[687775.162137] UBIFS error (pid 10861): ubifs_budget_space: cannot budget space, error -117
[687804.722203] UBIFS error (pid 11722): make_reservation: cannot reserve 160 bytes in jhead 1, error -30
[687804.732084] UBIFS error (pid 11722): ubifs_write_inode: can't write inode 74, error -30
           

分析为Nand Flash数据写入错误,编写测试程序对nand flash进行多任务的数据读写操作代码如下:

nand_test.c

#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include<stdio.h>



int main(void)
{
    FILE * fd = NULL;
    char buff[100];
    char tmp[1024];
    int j,k ;
    char buff1[10];

    while(1)
    {
       
        for(k = 0; k < 40; k ++)
        {
           sprintf(tmp,"/data/test%d.dat",k);
            fd = fopen(tmp, "wb");
            if(!fd)
            {
                printf("Cannot open %s\n",tmp);
                break;
            }
           
            for(j = 0; j < 1024*1024; j ++)
            {
                sprintf(buff,"%08x\n",k*j);
                if(fwrite( buff, 9,1,fd) != 1)
                {
                    printf("Write value to file:%s failed \n",tmp);
                    break;
                }
              
            }
            fflush(fd);
        
            fsync(fileno(fd));
        
            fclose(fd);
            
        
        
        
        }
        
        
        for(k = 0; k < 40; k ++)
        {
           sprintf(tmp,"/data/test%d.dat",k);
            fd = fopen(tmp, "r+");
            if(!fd)
            {
                printf("Cannot open %s\n",tmp);
                break;
            }
           
            for(j = 0; j < 1024*1024; j ++)
            {
                if(fread( buff, 9,1,fd) != 1)
                {
                    printf("read  file:%s failed \n",tmp);
                    break;
                }
                sprintf(buff1,"%08x\n",k*j);
                if(memcmp(buff,buff1,9))
                {
                    printf("check  file:%s line %d failed \n",tmp,j);
                }
            }
        
           // printf("check  file finish:%s  \n",tmp);
        
            fclose(fd);
        
        
        }
        
        
        k--;
        for(; k >= 0; k --)
        {
           sprintf(tmp,"/data/test%d.dat",k);
        
            sprintf(buff,"rm %s",tmp);
            
            system(buff);
        
        }
        printf("nand test success!~  \n");

    }






 
    return 0;

}
           

nand_test1.c

#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include<stdio.h>



int main(void)
{
    FILE * fd = NULL;
    char buff[100];
    char tmp[1024];
    int j,k ;
    char buff1[10];

    while(1)
    {
       
        for(k = 0; k < 10; k ++)
        {
           sprintf(tmp,"/data/test%d.dat",k + 40);
            fd = fopen(tmp, "wb");
            if(!fd)
            {
                printf("Cannot open %s\n",tmp);
                break;
            }
           
            for(j = 0; j < 1024*1024; j ++)
            {
                sprintf(buff,"%08x\n",k*j);
                if(fwrite( buff, 9,1,fd) != 1)
                {
                    printf("Write value to file:%s failed \n",tmp);
                    break;
                }
              
            }
            fflush(fd);
        
            fsync(fileno(fd));
        
            fclose(fd);
            
        
        
        
        }
        
        
        for(k = 0; k < 10; k ++)
        {
           sprintf(tmp,"/data/test%d.dat",k + 40);
            fd = fopen(tmp, "r+");
            if(!fd)
            {
                printf("Cannot open %s\n",tmp);
                break;
            }
           
            for(j = 0; j < 1024*1024; j ++)
            {
                if(fread( buff, 9,1,fd) != 1)
                {
                    printf("read  file:%s failed \n",tmp);
                    break;
                }
                sprintf(buff1,"%08x\n",k*j);
                if(memcmp(buff,buff1,9))
                {
                    printf("check  file:%s line %d failed \n",tmp,j);
                }
            }
        
           // printf("check  file finish:%s  \n",tmp);
        
            fclose(fd);
        
        
        }
        
        
        k--;
        for(; k >= 0; k --)
        {
           sprintf(tmp,"/data/test%d.dat",k + 40);
        
            sprintf(buff,"rm %s",tmp);
            
            system(buff);
        
        }
        printf("nand test 1 success!~  \n");

    }






 
    return 0;

}
           

测试发现,在多任务操作Nand Flash的过程中,会有比较大的概率导致nand flash错误。

在Linux Kernel的menuconfig中,配置选项 Device Drivers-> Memory Technology Device (MTD) support -> NAND Device Support->Verify NAND page writes 用来配置在nand flash写入时,是否进行额外的校验.

开启Verify NAND page writes后,执行测试程序,输出信息如下:

[270907.433837] UBI error: ubi_io_write: error -5 while writing 2048 bytes to PEB 976:106496, written 0 bytes
[270907.444130] UBI warning: ubi_eba_write_leb: failed to write data to PEB 976
[270907.451584] UBI: recover PEB 976, move data to PEB 1456
[270907.650083] UBI: data was successfully recovered
[270907.656046] UBI: run torture test for PEB 976
[270908.300205] UBI: PEB 976 passed torture test, do not mark it as bad
[271145.827524] UBI error: ubi_io_write: error -5 while writing 2048 bytes to PEB 1048:57344, written 0 bytes
[271145.837783] UBI warning: ubi_eba_write_leb: failed to write data to PEB 1048
[271145.845346] UBI: recover PEB 1048, move data to PEB 1457
[271145.963098] UBI: data was successfully recovered
[271145.968262] UBI: run torture test for PEB 1048
[271146.740653] UBI: PEB 1048 passed torture test, do not mark it as bad
           

分析在开启Verify NAND page writes,能有效减少nand flash出错导致的程序异常。

在长期稳定性的测试中发现,nand flash总是有可能会发生读写错误,导致分区变化为只读分区。由于在实际应用中,nand flash分区存储的为历史数据信息,所以希望在nand falsh读写错误后不修改为只读状态。参照https://e2e.ti.com/support/embedded/linux/f/354/t/171839的内容,修改如下:

===================================================================
--- fs/ubifs/scan.c (revision 1897)
+++ fs/ubifs/scan.c (working copy)
@@ -339,7 +339,7 @@
 if (!quiet)
 ubifs_err("corrupt empty space at LEB %d:%d",
 lnum, offs);
- goto corrupted;
+ //goto corrupted;
 }
 
 return sleb;
           

关于nand flash在am335x系列CPU上使用出现的这个问题,ti官方也没有太好的解决方案,具体可以参照ti的官方论坛答复。

https://e2e.ti.com/support/embedded/linux/f/354/t/171839

继续阅读