import re
import os
newfile_name = 'Cutadapt_stat.txt' #生成輸出檔案夾名
newfile = open(newfile_name,'w') #打開輸出檔案夾,準備錄入資訊
for root,dirs,files in os.walk(r"./"): #讀取目前路徑所有檔案
for file in files:
if ".trimmed_Cutadapt.report" in file:
#篩選檔案名并且擷取檔案路徑
#print(os.path.join(root,file))檢視路徑名是否正确
cutadapt_file = open(os.path.join(root,file),'r')#打開檔案
genome_line = cutadapt_file.readlines() #按行讀取檔案,我們需要的資料在8-11行
total_reads = genome_line[7].split(" ") #python第一行為genome_line[0],同時使用.split對注釋和數字進行分割,下同
total_reads = total_reads[-1].replace('\n','') #過濾數值後的換行符
total_adapters = genome_line[8].split(" ")
too_short = genome_line[9].split(" ")
Pass_filters = genome_line[10].split(" ")
file_name = file.replace('.trimmed_Cutadapt.report','')
result_stat = file_name+"\t"+total_reads+"\t"+total_adapters[-2]+"\t"+too_short[-2]+"\t"+Pass_filters[-2]+"\n"
#print(result_stat) ,列印樣品統計結果
newfile.write(result_stat)
newfile.close()
fastp_file.close()