先講結論:如果可以盡量不要用內建的 DOMDocument,因為問題很多也很難用。
案例:使用以下程式碼,修改內容後會得到亂碼 HTML
$dom = new DOMDocument();
$dom->loadHTML($contentType . $content);
$images = $dom->getElementsByTagName('img');
foreach ($images as $image) {
$src = $image->getAttribute('src');
$image->setAttribute('src', 'https://prefix.com' . $src);
}
$dom->saveHTML();
解法:加上 utf8 的 meta,並且取代最後產出的 HTML 標籤
$dom = new DOMDocument();
$contentType = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
$dom->loadHTML($contentType . $content);
$modified_content = $dom->saveHTML();
$modified_content = str_replace('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">', '', $modified_content);
$modified_content = str_replace('<html>', '', $modified_content);
$modified_content = str_replace('<head>', '', $modified_content);
$modified_content = str_replace('<meta http-equiv="Content-Type" content="text/html; charset=utf-8">', '', $modified_content);
$modified_content = str_replace('</head>', '', $modified_content);
$modified_content = str_replace('<body>', '', $modified_content);
$modified_content = str_replace('</body></html>', '', $modified_content);
echo $modified_content;