IOS使用正则表达式去掉html中的标签元素,获得纯文本
content是根据网址获得的网页源码字符串
NSRegularExpression *regularExpretion=[NSRegularExpression regularExpressionWithPattern:@"<[^>]*>|\n"
options:0
error:nil];
content=[regularExpretion stringByReplacingMatchesInString:content options:NSMatchingReportProgress range:NSMakeRange(0, content.length) withTemplate:@"-"];//替换所有html和换行匹配元素为"-"
regularExpretion=[NSRegularExpression regularExpressionWithPattern:@"-{1,}" options:0 error:nil] ;
content=[regularExpretion stringByReplacingMatchesInString:content options:NSMatchingReportProgress range:NSMakeRange(0, content.length) withTemplate:@"-"];//把多个"-"匹配为一个"-"
//根据"-"分割到数组
NSArray *arr=[NSArray array];
content=[NSString stringWithString:content];
arr = [content componentsSeparatedByString:@"-"];
NSMutableArray *marr=[NSMutableArray arrayWithArray:arr];
[marr removeObject:@""];
return marr;
//正则去除网络标签
-(NSString *)getZZwithString:(NSString *)string{NSRegularExpression *regularExpretion=[NSRegularExpression regularExpressionWithPattern:@"<[^>]*>|\n"options:0error:nil];string=[regularExpretion stringByReplacingMatchesInString:string options:NSMatchingReportProgress range:NSMakeRange(0, string.length) withTemplate:@""];return string;
}