C++提取html中的文本
char* HtmlUtil::readFile(std::string& file_name){
std::ifstream file(file_name);
file.seekg(0, std::ios::end);
int fileLength = file.tellg();
file.seekg(0, std::ios::beg);
char* buffer = new char[fileLength + 1];
file.read(buffer, fileLength);
buffer[fileLength] = '\0';
return buffer;
}
std::vector<std::string> HtmlUtil::getCppSource(std::string& file_name){
CDocument doc;
doc.parse(HtmlUtil::readFile(file_name));
CSelection c = doc.find(".source-cpp pre.de1, .source-text pre.de1");
int size = c.nodeNum();
std::vector<std::string> ret ;
for(int i = 0;i<size;i++){
ret.push_back(c.nodeAt(i).text());
}
return ret;
}