Skip to content

Commit

Permalink
Round 1 of rector
Browse files Browse the repository at this point in the history
  • Loading branch information
spekulatius committed Aug 29, 2023
1 parent 2c36173 commit 642744b
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 41 deletions.
6 changes: 2 additions & 4 deletions src/PHPScraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@ class PHPScraper

/**
* Holds the Core class. It handles the actual scraping.
*
* @var \Spekulatius\PHPScraper\Core
*/
protected $core = null;
protected Core $core;

/**
* @param PHPScraperConfig $config
Expand Down Expand Up @@ -92,7 +90,7 @@ public function setConfig(array $config = []): self
];

// Add the defaults in
$this->config = array_merge($defaults, $config);
$this->config = [...$defaults, ...$config];

// Symfony HttpClient
$httpClient = SymfonyHttpClient::create([
Expand Down
6 changes: 3 additions & 3 deletions src/UsesBrowserKit.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@ trait UsesBrowserKit
*
* @var \Symfony\Component\BrowserKit\HttpBrowser
*/
protected $client = null;
protected $client;

/**
* Holds the HttpClient
*
* @var \Symfony\Contracts\HttpClient\HttpClientInterface;
*/
protected $httpClient = null;
protected $httpClient;

/**
* Holds the current page (a Crawler object)
*
* @var \Symfony\Component\DomCrawler\Crawler
*/
protected $currentPage = null;
protected $currentPage;

/**
* Overwrites the client
Expand Down
34 changes: 15 additions & 19 deletions src/UsesContent.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public function baseHref(): ?string
/**
* Get the header collected as an array
*
* @return array<string, array|string|null>
* @return array{charset: mixed, contentType: mixed, viewport: mixed, canonical: mixed, csrfToken: mixed}
*/
public function headers(): array
{
Expand Down Expand Up @@ -102,6 +102,8 @@ public function description(): ?string

/**
* Get the meta collected as an array
*
* @return array{author: mixed, image: mixed, keywords: mixed, description: mixed}
*/
public function metaTags(): array
{
Expand Down Expand Up @@ -221,19 +223,15 @@ public function lists(): array
**/
public function orderedLists(): array
{
return array_values(array_filter($this->lists(), function ($list) {
return $list['type'] === 'ol';
}));
return array_values(array_filter($this->lists(), fn ($list): bool => $list['type'] === 'ol'));
}

/**
* @return array<string>
**/
public function unorderedLists(): array
{
return array_values(array_filter($this->lists(), function ($list) {
return $list['type'] === 'ul';
}));
return array_values(array_filter($this->lists(), fn ($list): bool => $list['type'] === 'ul'));
}

/**
Expand All @@ -254,9 +252,7 @@ public function cleanParagraphs(): array
{
return array_values(array_filter(
$this->paragraphs(),
function ($paragraph) {
return $paragraph !== '';
}
fn ($paragraph): bool => $paragraph !== ''
));
}

Expand Down Expand Up @@ -287,7 +283,7 @@ public function outlineWithParagraphs(): array

foreach ($result as $index => $array) {
$result[$index] = array_combine(['tag', 'content'], (array) $array);
$result[$index]['content'] = trim($result[$index]['content']);
$result[$index]['content'] = trim((string) $result[$index]['content']);
}

return $result;
Expand Down Expand Up @@ -451,7 +447,7 @@ public function internalLinks(): array
// Filter the array
return array_values(array_filter(
$this->links(),
function ($link) use (&$currentRootDomain) {
function ($link) use (&$currentRootDomain): bool {
$linkRootDomain = Uri::createFromString($link)->getHost();

return $currentRootDomain === $linkRootDomain;
Expand Down Expand Up @@ -502,18 +498,18 @@ public function linksWithDetails(): array
// Prepare the result set.
$entry = [
'url' => $uri,
'protocol' => \strpos($uri, ':') !== false ? explode(':', $uri)[0] : null,
'protocol' => str_contains($uri, ':') ? explode(':', $uri)[0] : null,
'text' => trim($link->nodeValue ?? ''),
'title' => $link->getAttribute('title') === '' ? null : $link->getAttribute('title'),
'target' => $link->getAttribute('target') === '' ? null : $link->getAttribute('target'),
'rel' => ($rel === '') ? null : strtolower($rel),
'image' => $image,
'isNofollow' => ($rel === '') ? false : (\strpos($rel, 'nofollow') !== false),
'isUGC' => ($rel === '') ? false : (\strpos($rel, 'ugc') !== false),
'isSponsored' => ($rel === '') ? false : (\strpos($rel, 'sponsored') !== false),
'isMe' => ($rel === '') ? false : (\strpos($rel, 'me') !== false),
'isNoopener' => ($rel === '') ? false : (\strpos($rel, 'noopener') !== false),
'isNoreferrer' => ($rel === '') ? false : (\strpos($rel, 'noreferrer') !== false),
'isNofollow' => ($rel === '') ? false : str_contains($rel, 'nofollow'),
'isUGC' => ($rel === '') ? false : str_contains($rel, 'ugc'),
'isSponsored' => ($rel === '') ? false : str_contains($rel, 'sponsored'),
'isMe' => ($rel === '') ? false : str_contains($rel, 'me'),
'isNoopener' => ($rel === '') ? false : str_contains($rel, 'noopener'),
'isNoreferrer' => ($rel === '') ? false : str_contains($rel, 'noreferrer'),
];

$result[] = $entry;
Expand Down
16 changes: 8 additions & 8 deletions src/UsesFileParsers.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public function csvDecodeWithHeaderRaw(
// Combine the rows with the header entry.
array_walk(
$csv,
function (&$row, $key, $header) {
function (&$row, $key, $header): void {
$row = array_combine($header, $row);
},
$header
Expand Down Expand Up @@ -133,18 +133,16 @@ public function csvDecodeWithHeader(

/**
* Helper method to cast types
*
* @return int|float|string
*/
public function castType(string $entry)
public function castType(string $entry): int|float|string
{
// Looks like an int?
if ($entry == (string) (int) $entry) {
if ($entry == (int) $entry) {
return (int) $entry;
}

// Looks like a float?
if ($entry == (string) (float) $entry) {
if ($entry == (float) $entry) {
return (float) $entry;
}

Expand Down Expand Up @@ -301,7 +299,9 @@ public function parseJson(?string $jsonStringOrUrl = null): array
// Fallback on the current URL, if needed and possible (`go` was used before).
$jsonStringOrUrl ?? $this->currentUrl()
),
true
true,
512,
JSON_THROW_ON_ERROR
);
} catch (\Exception $e) {
throw new \Exception('Failed to parse JSON: ' . $e->getMessage());
Expand Down Expand Up @@ -357,6 +357,6 @@ protected function xmlDecode(string $xmlString): array
$xml = simplexml_load_string(trim($xmlString), 'SimpleXMLElement', LIBXML_NOCDATA);

// Convert XML to JSON and then to an associative array
return (array) json_decode((string) json_encode($xml), true);
return (array) json_decode(json_encode($xml, JSON_THROW_ON_ERROR), true, 512, JSON_THROW_ON_ERROR);
}
}
7 changes: 0 additions & 7 deletions src/UsesXPathFilters.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ public function filter(string $query): Crawler

/**
* Filters the current page by a xPath-query and returns the first one, or null.
*
* @return ?Crawler
*/
public function filterFirst(string $query): ?Crawler
{
Expand All @@ -28,8 +26,6 @@ public function filterFirst(string $query): ?Crawler

/**
* Filters the current page by a xPath-query and returns the first ones content, or null.
*
* @return ?string
*/
public function filterFirstText(string $query): ?string
{
Expand Down Expand Up @@ -65,7 +61,6 @@ public function filterExtractAttributes(string $query, array $attributes): array
* Filters the current page by a xPath-query and returns the selected attributes of the first match.
*
* @param array<string> $attributes
* @return ?string
*/
public function filterFirstExtractAttribute(string $query, array $attributes): ?string
{
Expand All @@ -76,8 +71,6 @@ public function filterFirstExtractAttribute(string $query, array $attributes): ?

/**
* Returns the content attribute for the first result of the query, or null.
*
* @return ?string
*/
public function filterFirstContent(string $query): ?string
{
Expand Down

0 comments on commit 642744b

Please sign in to comment.