天天看點

iOS中使用正規表達式去掉HTML中的标簽元素獲得純文字的方法

content是根據網址獲得的網頁源碼字元串

- (NSString *)changeToString:(NSString *)content
{
    NSRegularExpression *regularExpretion=[NSRegularExpression regularExpressionWithPattern:@"<[^>]*>|\n"
                                                                                    options:0
                                                                                      error:nil];
    
    content = [regularExpretion stringByReplacingMatchesInString:content options:NSMatchingReportProgress range:NSMakeRange(0, content.length) withTemplate:@"-"];         //  替換所有html和換行比對元素為"-"
    
    regularExpretion = [NSRegularExpression regularExpressionWithPattern:@"-{1,}" options:0 error:nil] ;
    content = [regularExpretion stringByReplacingMatchesInString:content options:NSMatchingReportProgress range:NSMakeRange(0, content.length) withTemplate:@"-"];          //  把多個"-"比對為一個"-"
    
    //  根據"-"分割到數組
    NSArray *arr=[NSArray array];
    content = [NSString stringWithString:content];
    arr =  [content componentsSeparatedByString:@"-"];
    NSMutableArray *marr=[NSMutableArray arrayWithArray:arr];
    [marr removeObject:@""];
    NSMutableString *string = [[NSMutableString alloc] init];
    for (int i = 0; i < arr.count; i++) {
        [string appendString:[NSString stringWithFormat:@"%@",arr[i]]];
    }
    return  string;
}
           

轉載于:https://www.cnblogs.com/iyou/p/4858655.html