{"id":1259,"date":"2025-10-20T19:26:54","date_gmt":"2025-10-20T11:26:54","guid":{"rendered":"https:\/\/www.zhaozhao123.cn\/php\/my1js\/1259.html"},"modified":"2025-10-20T19:29:16","modified_gmt":"2025-10-20T11:29:16","slug":"masterminds-html5%e4%b8%8esymfony-domcrawler%e7%bb%93%e5%90%88%e4%bd%bf%e7%94%a8%e6%95%99%e7%a8%8b%e4%b8%8e%e5%ae%9e%e6%88%98","status":"publish","type":"my1js","link":"https:\/\/www.zhaozhao123.cn\/php\/my1js\/1259.html","title":{"rendered":"masterminds\/html5\u4e0eSymfony DomCrawler\u7ed3\u5408\u4f7f\u7528\u6559\u7a0b\u4e0e\u5b9e\u6218"},"content":{"rendered":"<p>\u4ee5\u4e0b\u662f\u7ed3\u5408\u4f7f\u7528 <code>masterminds\/html5<\/code>\uff08PHP \u7684 HTML5 \u89e3\u6790\u5e93\uff09\u4e0e Symfony DomCrawler \u7684\u6559\u7a0b\u548c\u5b9e\u6218\u6307\u5357\u3002\u901a\u8fc7\u4e8c\u8005\u7684\u7ed3\u5408\uff0c\u53ef\u4ee5\u66f4\u9ad8\u6548\u5730\u89e3\u6790\u73b0\u4ee3 HTML5 \u5185\u5bb9\uff0c\u5e76\u5229\u7528 DomCrawler \u5f3a\u5927\u7684\u6570\u636e\u63d0\u53d6\u80fd\u529b\u3002<\/p><hr class=\"wp-block-separator has-alpha-channel-opacity\"><h3 class=\"wp-block-heading\">\u4e00\u3001\u80cc\u666f\u4ecb\u7ecd<\/h3><ol class=\"wp-block-list\">\n<li><strong>masterminds\/html5<\/strong><br>\u4e00\u4e2a\u7b26\u5408 HTML5 \u89c4\u8303\u7684 PHP \u89e3\u6790\u5e93\uff0c\u652f\u6301\u89e3\u6790\u590d\u6742\u7684 HTML5 \u6587\u6863\uff08\u5982\u672a\u95ed\u5408\u6807\u7b7e\u3001\u5d4c\u5957\u9519\u8bef\u7b49\uff09\uff0c\u9002\u7528\u4e8e\u5904\u7406\u73b0\u4ee3\u7f51\u9875\u3002<\/li>\n\n\n\n<li><strong>Symfony DomCrawler<\/strong><br>Symfony \u63d0\u4f9b\u7684 HTML\/XML \u89e3\u6790\u5de5\u5177\uff0c\u652f\u6301 XPath \u548c CSS \u9009\u62e9\u5668\uff0c\u9002\u5408\u4ece\u6587\u6863\u4e2d\u63d0\u53d6\u7ed3\u6784\u5316\u6570\u636e\u3002<\/li>\n<\/ol><p>\u4e8c\u8005\u7684\u7ed3\u5408\u80fd\u89e3\u51b3\u539f\u751f PHP DOM \u89e3\u6790\u5668\u5bf9 HTML5 \u652f\u6301\u4e0d\u8db3\u7684\u95ee\u9898\uff0c\u540c\u65f6\u4fdd\u7559 DomCrawler \u7684\u4fbf\u6377 API\u3002<\/p><hr class=\"wp-block-separator has-alpha-channel-opacity\"><h3 class=\"wp-block-heading\">\u4e8c\u3001\u73af\u5883\u51c6\u5907<\/h3><h4 class=\"wp-block-heading\">1. \u5b89\u88c5\u4f9d\u8d56<\/h4><p>\u901a\u8fc7 Composer \u5b89\u88c5\u4e24\u4e2a\u5e93\uff1a<\/p><pre class=\"wp-block-code\"><code>composer require masterminds\/html5 symfony\/dom-crawler symfony\/css-selector<\/code><\/pre><h4 class=\"wp-block-heading\">2. \u5f15\u5165\u547d\u540d\u7a7a\u95f4<\/h4><p>use MastermindsHTML5;<br>use SymfonyComponentDomCrawlerCrawler;<\/p><hr class=\"wp-block-separator has-alpha-channel-opacity\"><h3 class=\"wp-block-heading\">\u4e09\u3001\u57fa\u7840\u7528\u6cd5<\/h3><h4 class=\"wp-block-heading\">1. \u89e3\u6790 HTML5 \u5e76\u521b\u5efa Crawler \u5bf9\u8c61<\/h4><pre class=\"wp-block-code\"><code>$html5 = new HTML5();\n$htmlContent = &lt;&lt;&lt;'HTML'\n&lt;!DOCTYPE html&gt;\n&lt;html&gt;\n&lt;head&gt;&lt;title&gt;Test&lt;\/title&gt;&lt;\/head&gt;\n&lt;body&gt;\n  &lt;article class=\"post\"&gt;\n    &lt;h1&gt;Hello HTML5!&lt;\/h1&gt;\n    &lt;div data-content=\"main\"&gt;Content with &lt;span&gt;nested&lt;\/span&gt; elements.&lt;\/div&gt;\n  &lt;\/article&gt;\n&lt;\/body&gt;\n&lt;\/html&gt;\nHTML;\n\n\/\/ \u4f7f\u7528 html5 \u89e3\u6790\u5668\u751f\u6210 DOMDocument\n$dom = $html5-&gt;loadHTML($htmlContent);\n\n\/\/ \u5c06 DOMDocument \u4f20\u9012\u7ed9 DomCrawler\n$crawler = new Crawler($dom);<\/code><\/pre><h4 class=\"wp-block-heading\">2. \u4f7f\u7528 CSS \u9009\u62e9\u5668\u63d0\u53d6\u6570\u636e<\/h4><pre class=\"wp-block-code\"><code>\/\/ \u63d0\u53d6\u6807\u9898\u6587\u672c\n$title = $crawler-&gt;filter('title')-&gt;text();\necho $title; \/\/ \u8f93\u51fa: Test\n\n\/\/ \u63d0\u53d6\u6587\u7ae0\u5185\u5bb9\n$crawler-&gt;filter('.post')-&gt;each(function (Crawler $node) {\n    $heading = $node-&gt;filter('h1')-&gt;text();\n    $content = $node-&gt;filter('[data-content=\"main\"]')-&gt;text();\n\n    echo \"Heading: $headingnContent: $contentn\";\n});<\/code><\/pre><hr class=\"wp-block-separator has-alpha-channel-opacity\"><h3 class=\"wp-block-heading\">\u56db\u3001\u5b9e\u6218\u573a\u666f\uff1a\u722c\u53d6\u52a8\u6001 HTML5 \u5185\u5bb9<\/h3><h4 class=\"wp-block-heading\">1. \u89e3\u6790\u8fdc\u7a0b\u7f51\u9875<\/h4><pre class=\"wp-block-code\"><code>$url = '<a href=\"https:\/\/www.zhaozhao123.cn\/skin\/go?url=aHR0cHM6Ly9leGFtcGxlLmNvbQ==\" rel=\"noopener noreferrer nofollow\">https:\/\/example.com<\/a>';\n$htmlContent = file_get_contents($url);\n\n\/\/ \u4f7f\u7528 html5 \u89e3\u6790\u53ef\u80fd\u5305\u542b\u975e\u6807\u51c6\u6807\u7b7e\u7684\u9875\u9762\n$html5 = new HTML5();\n$dom = $html5-&gt;loadHTML($htmlContent);\n$crawler = new Crawler($dom);\n\n\/\/ \u63d0\u53d6\u6240\u6709\u94fe\u63a5\n$links = $crawler-&gt;filter('a')-&gt;extract(['href']);\nprint_r($links);<\/code><\/pre><h4 class=\"wp-block-heading\">2. \u5904\u7406 HTML5 \u7279\u6b8a\u6807\u7b7e<\/h4><p>\u82e5\u9875\u9762\u5305\u542b <code>&lt;template&gt;<\/code>\u3001<code>&lt;svg&gt;<\/code> \u6216\u81ea\u5b9a\u4e49\u6807\u7b7e\uff1a<\/p><pre class=\"wp-block-code\"><code>$html = &lt;&lt;&lt;'HTML'\n&lt;div&gt;\n  &lt;template id=\"tpl\"&gt;&lt;p&gt;HTML5 Template&lt;\/p&gt;&lt;\/template&gt;\n  &lt;svg&gt;&lt;circle cx=\"50\" cy=\"50\" r=\"40\"\/&gt;&lt;\/svg&gt;\n&lt;\/div&gt;\nHTML;\n$html5 = new HTML5();\n$dom = $html5-&gt;loadHTML($html);\n$crawler = new Crawler($dom);\n\n\/\/ \u63d0\u53d6 SVG \u4e2d\u7684 circle \u5c5e\u6027\n$circle = $crawler-&gt;filter('svg circle')-&gt;attr('cx');\necho \"Circle CX: $circle\"; \/\/ \u8f93\u51fa: 50<\/code><\/pre><hr class=\"wp-block-separator has-alpha-channel-opacity\"><h3 class=\"wp-block-heading\">\u4e94\u3001\u9ad8\u7ea7\u6280\u5de7<\/h3><h4 class=\"wp-block-heading\">1. \u9519\u8bef\u5904\u7406\u4e0e\u5bb9\u9519<\/h4><pre class=\"wp-block-code\"><code>try {\n    $dom = $html5-&gt;loadHTML('&lt;div&gt;&lt;p&gt;Broken HTML&lt;\/div&gt;');\n} catch (Exception $e) {\n    \/\/ \u5904\u7406\u89e3\u6790\u9519\u8bef\n    echo \"\u89e3\u6790\u5931\u8d25: \" . $e-&gt;getMessage();\n}\n\n\/\/ \u9759\u9ed8\u5ffd\u7565\u9519\u8bef\uff08\u9ed8\u8ba4\u884c\u4e3a\uff09\n$html5-&gt;setOption('ignore_parse_errors', true);\n$dom = $html5-&gt;loadHTML('&lt;div&gt;&lt;p&gt;Broken HTML');<\/code><\/pre><h4 class=\"wp-block-heading\">2. \u7ed3\u5408 XPath \u590d\u6742\u67e5\u8be2<\/h4><pre class=\"wp-block-code\"><code>\/\/ \u4f7f\u7528 XPath \u67e5\u627e\u5177\u6709 data-* \u5c5e\u6027\u7684\u5143\u7d20\n$result = $crawler-&gt;filterXPath('\/\/*[@data-content]')-&gt;each(function (Crawler $node) {\n    return $node-&gt;attr('data-content');\n});\nprint_r($result); \/\/ \u8f93\u51fa: ['main']<\/code><\/pre><hr class=\"wp-block-separator has-alpha-channel-opacity\"><h3 class=\"wp-block-heading\">\u516d\u3001\u6027\u80fd\u4f18\u5316<\/h3><ul class=\"wp-block-list\">\n<li><strong>\u7f13\u5b58 DOM \u89e3\u6790\u7ed3\u679c<\/strong>\uff1a\u5bf9\u9759\u6001\u5185\u5bb9\u89e3\u6790\u4e00\u6b21\u540e\u91cd\u590d\u4f7f\u7528\u3002<\/li>\n\n\n\n<li><strong>\u9650\u5236\u9009\u62e9\u5668\u8303\u56f4<\/strong>\uff1a\u901a\u8fc7 <code>filter()<\/code> \u9650\u5b9a\u4e0a\u4e0b\u6587\uff0c\u51cf\u5c11\u641c\u7d22\u8303\u56f4\u3002<\/li>\n\n\n\n<li><strong>\u907f\u514d\u91cd\u590d\u89e3\u6790<\/strong>\uff1a\u76f4\u63a5\u590d\u7528 <code>Crawler<\/code> \u5bf9\u8c61\u3002<\/li>\n<\/ul><hr class=\"wp-block-separator has-alpha-channel-opacity\"><h3 class=\"wp-block-heading\">\u4e03\u3001\u603b\u7ed3<\/h3><p>\u901a\u8fc7\u7ed3\u5408 <code>masterminds\/html5<\/code> \u7684\u5f3a\u5bb9\u9519\u6027\u548c <code>Symfony DomCrawler<\/code> \u7684\u4fbf\u6377\u9009\u62e9\u5668\uff0c\u53ef\u4ee5\u9ad8\u6548\u5904\u7406\u73b0\u4ee3 HTML5 \u5185\u5bb9\u3002\u9002\u7528\u4e8e\u4ee5\u4e0b\u573a\u666f\uff1a<\/p><ul class=\"wp-block-list\">\n<li>\u722c\u53d6\u52a8\u6001\u751f\u6210\u7684\u7f51\u9875<\/li>\n\n\n\n<li>\u89e3\u6790\u5bcc\u6587\u672c\u7f16\u8f91\u5668\u8f93\u51fa\u7684\u975e\u6807\u51c6 HTML<\/li>\n\n\n\n<li>\u5904\u7406\u5305\u542b Web Components\/SVG \u7684\u9875\u9762<\/li>\n<\/ul><p>\u5b8c\u6574\u4ee3\u7801\u793a\u4f8b\u53ef\u5728 <a href=\"https:\/\/www.zhaozhao123.cn\/skin\/go?url=aHR0cHM6Ly9naXN0LmdpdGh1Yi5jb20vZXhhbXBsZQ==\" rel=\"noopener noreferrer nofollow\">GitHub Gist<\/a> \u83b7\u53d6\u3002<\/p>","protected":false},"excerpt":{"rendered":"<p>\u4ee5\u4e0b\u662f\u7ed3\u5408\u4f7f\u7528 masterminds\/html5\uff08PHP \u7684 HTML5 \u89e3\u6790\u5e93\uff09\u4e0e Symfony DomCrawler \u7684\u6559\u7a0b\u548c\u5b9e\u6218\u6307\u5357\u3002\u901a\u8fc7\u4e8c\u8005\u7684\u7ed3\u5408\uff0c\u53ef\u4ee5\u66f4\u9ad8\u6548\u5730\u89e3\u6790\u73b0\u4ee3 HTML5 \u5185\u5bb9\uff0c\u5e76\u5229\u7528 DomCrawler \u5f3a\u5927\u7684\u6570\u636e\u63d0\u53d6\u80fd\u529b\u3002 \u4e00\u3001\u80cc\u666f\u4ecb\u7ecd \u4e8c\u8005\u7684\u7ed3\u5408\u80fd\u89e3\u51b3\u539f\u751f PHP DOM \u89e3\u6790\u5668\u5bf9 HTM..<\/p>\n","protected":false},"author":1,"featured_media":0,"menu_order":0,"template":"","meta":{"_acf_changed":false},"tags":[],"my1js2nav":[45],"tuisongtax":[],"class_list":["post-1259","my1js","type-my1js","status-publish","hentry","my1js2nav-symfony"],"acf":{"qian_art_seotitle":"","qian_art_seotitle_source":{"label":"SEO\u6807\u9898","type":"text","formatted_value":""},"qian_art_seokws":"","qian_art_seokws_source":{"label":"SEO\u5173\u952e\u8bcd","type":"text","formatted_value":""},"qian_art_stzhong":"","qian_art_stzhong_source":{"label":"\u4e2d | \u77ed\u6807\u9898","type":"text","formatted_value":""}},"_links":{"self":[{"href":"https:\/\/www.zhaozhao123.cn\/php\/wp-json\/wp\/v2\/my1js\/1259","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.zhaozhao123.cn\/php\/wp-json\/wp\/v2\/my1js"}],"about":[{"href":"https:\/\/www.zhaozhao123.cn\/php\/wp-json\/wp\/v2\/types\/my1js"}],"author":[{"embeddable":true,"href":"https:\/\/www.zhaozhao123.cn\/php\/wp-json\/wp\/v2\/users\/1"}],"wp:attachment":[{"href":"https:\/\/www.zhaozhao123.cn\/php\/wp-json\/wp\/v2\/media?parent=1259"}],"wp:term":[{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.zhaozhao123.cn\/php\/wp-json\/wp\/v2\/tags?post=1259"},{"taxonomy":"my1js2nav","embeddable":true,"href":"https:\/\/www.zhaozhao123.cn\/php\/wp-json\/wp\/v2\/my1js2nav?post=1259"},{"taxonomy":"tuisongtax","embeddable":true,"href":"https:\/\/www.zhaozhao123.cn\/php\/wp-json\/wp\/v2\/tuisongtax?post=1259"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}