Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ jobs:
name: Coding Standard
uses: brick/coding-standard/.github/workflows/coding-standard.yml@v2
with:
php-version: 8.4
working-directory: "tools/ecs"

phpunit:
Expand All @@ -43,15 +44,9 @@ jobs:
fail-fast: false
matrix:
php-version:
- "8.1"
- "8.2"
- "8.3"
- "8.4"
deps:
- "highest"
include:
- php-version: "8.1"
deps: "lowest"

steps:
- name: Checkout
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ composer require brick/structured-data

### Requirements

This library requires PHP 8.1 or later. It makes use of the following extensions:
This library requires PHP 8.4 or later. It makes use of the following extensions:

- [dom](https://www.php.net/manual/en/book.dom.php)
- [json](https://www.php.net/manual/en/book.json.php)
Expand Down Expand Up @@ -55,13 +55,13 @@ interface Brick\StructuredData\Reader
/**
* Reads the items contained in the given document.
*
* @param DOMDocument $document The DOM document to read.
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
* URLs in property values. No attempt will be performed to connect to this URL.
* @param HTMLDocument $document The DOM document to read.
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
* URLs in property values. No attempt will be performed to connect to this URL.
*
* @return Item[] The top-level items.
*/
public function read(DOMDocument $document, string $url) : array;
public function read(HTMLDocument $document, string $url) : array;
}
```

Expand Down Expand Up @@ -93,7 +93,7 @@ use Brick\StructuredData\Item;
$microdataReader = new MicrodataReader();

// Wrap into HTMLReader to be able to read HTML strings or files directly,
// i.e. without manually converting them to DOMDocument instances first
// i.e. without manually converting them to HTMLDocument instances first
$htmlReader = new HTMLReader($microdataReader);

// Replace this URL with that of a website you know is using Microdata
Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
],
"license": "MIT",
"require": {
"php": "^8.1",
"php": "^8.4",
"ext-dom": "*",
"ext-json": "*",
"ext-libxml": "*",
Expand Down
171 changes: 83 additions & 88 deletions psalm-baseline.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
</MissingClosureParamType>
<MixedArgument>
<code><![CDATA[$name]]></code>
<code><![CDATA[$node->textContent]]></code>
</MixedArgument>
<MixedArrayAccess>
<code><![CDATA[$name[0]]]></code>
</MixedArrayAccess>
<PossiblyInvalidArgument>
<code><![CDATA[fn (DOMNode $node) => $this->readJson($node->textContent, $url)]]></code>
</PossiblyInvalidArgument>
<NamedArgumentNotAllowed>
<code><![CDATA[$items]]></code>
</NamedArgumentNotAllowed>
<RawObjectIteration>
<code><![CDATA[$item]]></code>
</RawObjectIteration>
Expand All @@ -29,106 +30,100 @@
</RedundantConditionGivenDocblockType>
</file>
<file src="src/Reader/MicrodataReader.php">
<InvalidArgument>
<code><![CDATA[function (DOMNode $itemprop) use ($node, $xpath) {
for (; ;) {
$itemprop = $itemprop->parentNode;

if ($itemprop->isSameNode($node)) {
return true;
}

if ($itemprop->attributes->getNamedItem('itemscope')) {
return false;
}
}
}]]></code>
</InvalidArgument>
<PossiblyInvalidArgument>
<code><![CDATA[fn (DOMNode $node) => $this->nodeToItem($node, $xpath, $url)]]></code>
<code><![CDATA[function (DOMNode $itemprop) use ($node, $xpath) {
for (; ;) {
$itemprop = $itemprop->parentNode;

if ($itemprop->isSameNode($node)) {
return true;
}

if ($itemprop->attributes->getNamedItem('itemscope')) {
return false;
}
}
}]]></code>
</PossiblyInvalidArgument>
<PossiblyNullArgument>
<MixedArgument>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$itemid->textContent]]></code>
<code><![CDATA[$itemtype->textContent]]></code>
<code><![CDATA[$names]]></code>
<code><![CDATA[preg_replace('/\s+/', ' ', $node->textContent)]]></code>
</PossiblyNullArgument>
<PossiblyNullPropertyFetch>
<code><![CDATA[$itemprop->attributes->getNamedItem('itemprop')->textContent]]></code>
</PossiblyNullPropertyFetch>
<PossiblyNullReference>
<code><![CDATA[$node->textContent]]></code>
</MixedArgument>
<MixedMethodCall>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[isSameNode]]></code>
</PossiblyNullReference>
</file>
<file src="src/Reader/RdfaLiteReader.php">
<InvalidArgument>
<code><![CDATA[function (DOMNode $itemprop) use ($node, $xpath) {
for (; ;) {
$itemprop = $itemprop->parentNode;

if ($itemprop->isSameNode($node)) {
return true;
}

if ($itemprop->attributes->getNamedItem('typeof')) {
return false;
}
}

// Unreachable, but makes static analysis happy
return false;
}]]></code>
</InvalidArgument>
</MixedMethodCall>
<MixedPropertyFetch>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$itemid->textContent]]></code>
<code><![CDATA[$itemprop->attributes]]></code>
<code><![CDATA[$itemprop->attributes->getNamedItem('itemprop')->textContent]]></code>
<code><![CDATA[$itemprop->parentNode]]></code>
<code><![CDATA[$itemtype->textContent]]></code>
</MixedPropertyFetch>
<MixedReturnStatement>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
</MixedReturnStatement>
<PossiblyInvalidArgument>
<code><![CDATA[fn (DOMNode $node) => $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null)]]></code>
<code><![CDATA[function (DOMNode $itemprop) use ($node, $xpath) {
for (; ;) {
$itemprop = $itemprop->parentNode;

if ($itemprop->isSameNode($node)) {
return true;
}

if ($itemprop->attributes->getNamedItem('typeof')) {
return false;
}
}

// Unreachable, but makes static analysis happy
return false;
}]]></code>
<code><![CDATA[preg_replace('/\s+/', ' ', $node->textContent)]]></code>
</PossiblyInvalidArgument>
<PossiblyNullArgument>
<UndefinedPropertyFetch>
<code><![CDATA[$itemprop->attributes]]></code>
<code><![CDATA[$node->attributes]]></code>
<code><![CDATA[$node->attributes]]></code>
</UndefinedPropertyFetch>
</file>
<file src="src/Reader/RdfaLiteReader.php">
<MixedArgument>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$names]]></code>
<code><![CDATA[$node->textContent]]></code>
<code><![CDATA[$resource->textContent]]></code>
<code><![CDATA[$typeof->textContent]]></code>
<code><![CDATA[preg_replace('/\s+/', ' ', $node->textContent)]]></code>
</PossiblyNullArgument>
<PossiblyNullPropertyFetch>
<code><![CDATA[$property->attributes->getNamedItem('property')->textContent]]></code>
<code><![CDATA[$typeof->textContent]]></code>
</PossiblyNullPropertyFetch>
<PossiblyNullReference>
<code><![CDATA[$vocab->textContent]]></code>
</MixedArgument>
<MixedMethodCall>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[getNamedItem]]></code>
<code><![CDATA[isSameNode]]></code>
</PossiblyNullReference>
</MixedMethodCall>
<MixedPropertyFetch>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$attr->textContent]]></code>
<code><![CDATA[$itemprop->attributes]]></code>
<code><![CDATA[$itemprop->parentNode]]></code>
<code><![CDATA[$property->attributes->getNamedItem('property')->textContent]]></code>
<code><![CDATA[$resource->textContent]]></code>
<code><![CDATA[$typeof->textContent]]></code>
<code><![CDATA[$vocab->textContent]]></code>
</MixedPropertyFetch>
<MixedReturnStatement>
<code><![CDATA[$attr->textContent]]></code>
</MixedReturnStatement>
<PossiblyInvalidArgument>
<code><![CDATA[preg_replace('/\s+/', ' ', $node->textContent)]]></code>
</PossiblyInvalidArgument>
<UndefinedPropertyFetch>
<code><![CDATA[$node->attributes]]></code>
<code><![CDATA[$node->attributes]]></code>
<code><![CDATA[$node->attributes]]></code>
<code><![CDATA[$property->attributes]]></code>
</UndefinedPropertyFetch>
</file>
</files>
22 changes: 8 additions & 14 deletions src/DOMBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,26 @@

namespace Brick\StructuredData;

use DOMDocument;
use Dom\HTMLDocument;

use const Dom\HTML_NO_DEFAULT_NS;
use const LIBXML_NOERROR;
use const LIBXML_NOWARNING;

final class DOMBuilder
{
/**
* Builds a DOMDocument from an HTML string.
* Builds a HTMLDocument from an HTML string.
*/
public static function fromHTML(string $html): DOMDocument
public static function fromHTML(string $html): HTMLDocument
{
$document = new DOMDocument();
$document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);

return $document;
return HTMLDocument::createFromString($html, LIBXML_NOERROR | HTML_NO_DEFAULT_NS);
}

/**
* Builds a DOMDocument from an HTML file.
* Builds a HTMLDocument from an HTML file.
*/
public static function fromHTMLFile(string $file): DOMDocument
public static function fromHTMLFile(string $file): HTMLDocument
{
$document = new DOMDocument();
$document->loadHTMLFile($file, LIBXML_NOWARNING | LIBXML_NOERROR);

return $document;
return HTMLDocument::createFromFile($file, LIBXML_NOERROR | HTML_NO_DEFAULT_NS);
}
}
10 changes: 5 additions & 5 deletions src/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace Brick\StructuredData;

use DOMDocument;
use Dom\HTMLDocument;

/**
* Common interface for readers of each format: Microdata, RDFa Lite, JSON-LD.
Expand All @@ -14,11 +14,11 @@ interface Reader
/**
* Reads the items contained in the given document.
*
* @param DOMDocument $document The DOM document to read.
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
* URLs in property values. The implementation must not attempt to connect to this URL.
* @param HTMLDocument $document The HTMLDocument to read.
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
* URLs in property values. The implementation must not attempt to connect to this URL.
*
* @return Item[] The top-level items.
*/
public function read(DOMDocument $document, string $url): array;
public function read(HTMLDocument $document, string $url): array;
}
12 changes: 6 additions & 6 deletions src/Reader/JsonLdReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

use Brick\StructuredData\Item;
use Brick\StructuredData\Reader;
use DOMDocument;
use DOMNode;
use DOMXPath;
use Dom\HTMLDocument;
use Dom\Node;
use Dom\XPath;
use Override;
use Sabre\Uri\InvalidUriException;
use stdClass;
Expand Down Expand Up @@ -66,9 +66,9 @@ public function __construct(array $iriProperties = [])
}

#[Override]
public function read(DOMDocument $document, string $url): array
public function read(HTMLDocument $document, string $url): array
{
$xpath = new DOMXPath($document);
$xpath = new XPath($document);

$nodes = $xpath->query('//script[@type="application/ld+json"]');
$nodes = iterator_to_array($nodes);
Expand All @@ -78,7 +78,7 @@ public function read(DOMDocument $document, string $url): array
}

$items = array_map(
fn (DOMNode $node) => $this->readJson($node->textContent, $url),
fn (Node $node) => $this->readJson($node->textContent, $url),
$nodes,
);

Expand Down
Loading