Server : LiteSpeed System : Linux server335.web-hosting.com 4.18.0-553.62.1.lve.el8.x86_64 #1 SMP Mon Jul 21 17:50:35 UTC 2025 x86_64 User : cardxfeb ( 2452) PHP Version : 8.1.34 Disable Function : NONE Directory : /home/cardxfeb/www/app/Services/ |
<?php
namespace App\Services;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;
class SocialScraperService
{
/**
* Fetch from multiple URLs and merge results.
* Separates social profiles from corporate websites.
*/
public function fetchMultiple(array $urls): array
{
$sources = [];
foreach ($urls as $url) {
$url = trim($url);
if (empty($url) || !filter_var($url, FILTER_VALIDATE_URL)) {
continue;
}
$platform = $this->detectPlatform($url);
if ($platform === 'website') {
$sources['corporate'][] = $this->fetchWebsiteData($url);
} else {
$sources['social'][] = $this->fetchSocialData($url, $platform);
}
}
return [
'social' => $sources['social'] ?? [],
'corporate' => $sources['corporate'] ?? [],
'merged' => $this->merge($sources),
];
}
/**
* Fetch Open Graph data from a social profile URL.
*/
public function fetchSocialData(string $url, string $platform = null): array
{
$platform = $platform ?? $this->detectPlatform($url);
try {
$response = Http::withHeaders([
'User-Agent' => 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
'Accept-Language' => 'pt-BR,pt;q=0.9,en;q=0.8',
'Accept' => 'text/html,application/xhtml+xml',
])->timeout(10)->get($url);
if (!$response->ok()) {
return $this->emptyResult($url, $platform);
}
$html = $response->body();
return [
'type' => 'social',
'platform' => $platform,
'url' => $url,
'title' => $this->extractOg($html, 'title') ?: $this->extractMeta($html, 'title') ?: $this->extractTag($html, 'title'),
'description' => $this->extractOg($html, 'description') ?: $this->extractMeta($html, 'description'),
'image' => $this->extractOg($html, 'image') ?: '',
'site_name' => $this->extractOg($html, 'site_name') ?: '',
];
} catch (\Exception $e) {
Log::error("[SocialScraper] Social fetch failed for {$url}: " . $e->getMessage());
return $this->emptyResult($url, $platform);
}
}
/**
* Fetch corporate website data: title, description, AND brand colors.
* Detects colors from: theme-color meta, CSS custom properties, og:image dominant.
*/
public function fetchWebsiteData(string $url): array
{
try {
$response = Http::withHeaders([
'User-Agent' => 'Mozilla/5.0 (compatible; CardAoSegundoBot/1.0)',
'Accept' => 'text/html,application/xhtml+xml',
])->timeout(12)->get($url);
if (!$response->ok()) {
return $this->emptyCorporateResult($url);
}
$html = $response->body();
// Extract colors from multiple sources
$colors = $this->extractBrandColors($html);
return [
'type' => 'corporate',
'platform' => 'website',
'url' => $url,
'title' => $this->extractOg($html, 'title') ?: $this->extractTag($html, 'title'),
'description' => $this->extractOg($html, 'description') ?: $this->extractMeta($html, 'description'),
'image' => $this->extractOg($html, 'image') ?: '',
'colors' => $colors,
'keywords' => $this->extractMeta($html, 'keywords'),
'author' => $this->extractMeta($html, 'author'),
];
} catch (\Exception $e) {
Log::error("[SocialScraper] Website fetch failed for {$url}: " . $e->getMessage());
return $this->emptyCorporateResult($url);
}
}
/**
* Extract brand colors from HTML/CSS:
* Priority: theme-color > CSS :root vars > msapplication-TileColor > og:image
*/
private function extractBrandColors(string $html): array
{
$colors = [];
// 1. theme-color meta tag (most reliable)
if (preg_match('/<meta[^>]+name=["\']theme-color["\'][^>]+content=["\']([#\w\d,\s]+)["\']/', $html, $m)) {
$colors['theme_color'] = trim($m[1]);
}
// 2. msapplication-TileColor (Windows tiles)
if (preg_match('/<meta[^>]+name=["\']msapplication-TileColor["\'][^>]+content=["\']([#\w\d]+)["\']/', $html, $m)) {
$colors['tile_color'] = trim($m[1]);
}
// 3. CSS :root custom properties (--primary-color, --brand-color, etc.)
$css_patterns = [
'/--primary[-_]?color\s*:\s*([#\w\d]+)/i',
'/--brand[-_]?color\s*:\s*([#\w\d]+)/i',
'/--accent[-_]?color\s*:\s*([#\w\d]+)/i',
'/--color[-_]?primary\s*:\s*([#\w\d]+)/i',
'/--main[-_]?color\s*:\s*([#\w\d]+)/i',
];
foreach ($css_patterns as $pattern) {
if (preg_match($pattern, $html, $m)) {
$hex = trim($m[1]);
if ($this->isValidHex($hex)) {
$colors['css_primary'] = $hex;
break;
}
}
}
// 4. Inline style background-color on body/header
if (preg_match('/<(?:body|header)[^>]+style=["\'][^"\']*background(?:-color)?\s*:\s*([#\w\d]+)/i', $html, $m)) {
$hex = trim($m[1]);
if ($this->isValidHex($hex)) {
$colors['body_bg'] = $hex;
}
}
// 5. Manifest theme_color from linked manifest
if (preg_match('/<link[^>]+rel=["\']manifest["\'][^>]+href=["\']([^"\']+)["\']/', $html, $m)) {
$colors['_manifest_url'] = $m[1]; // Store for potential follow-up fetch
}
return $colors;
}
/**
* Merge social + corporate into unified analysis input
*/
private function merge(array $sources): array
{
$social = $sources['social'] ?? [];
$corporate = $sources['corporate'] ?? [];
$allColors = [];
foreach ($corporate as $corp) {
$allColors = array_merge($allColors, $corp['colors'] ?? []);
}
$primarySocial = $social[0] ?? [];
return [
'name' => $primarySocial['title'] ?? ($corporate[0]['author'] ?? ''),
'bio_hint' => $primarySocial['description'] ?? '',
'brand_name' => $corporate[0]['title'] ?? '',
'brand_desc' => $corporate[0]['description'] ?? '',
'brand_colors'=> $allColors,
'platforms' => array_map(fn($s) => $s['platform'], $social),
'sources' => count($sources['social'] ?? []) + count($sources['corporate'] ?? []),
];
}
// Helpers
private function extractOg(string $html, string $property): string
{
if (preg_match('/<meta[^>]+property=["\']og:' . $property . '["\'][^>]+content=["\']([^"\']+)["\']/is', $html, $m)) {
return trim(html_entity_decode($m[1], ENT_QUOTES, 'UTF-8'));
}
if (preg_match('/<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']og:' . $property . '["\']/is', $html, $m)) {
return trim(html_entity_decode($m[1], ENT_QUOTES, 'UTF-8'));
}
return '';
}
private function extractMeta(string $html, string $name): string
{
if (preg_match('/<meta[^>]+name=["\']' . $name . '["\'][^>]+content=["\']([^"\']+)["\']/is', $html, $m)) {
return trim(html_entity_decode($m[1], ENT_QUOTES, 'UTF-8'));
}
return '';
}
private function extractTag(string $html, string $tag): string
{
if (preg_match('/<' . $tag . '[^>]*>\s*(.*?)\s*<\/' . $tag . '>/is', $html, $m)) {
return trim(strip_tags($m[1]));
}
return '';
}
private function isValidHex(string $color): bool
{
return (bool) preg_match('/^#([A-Fa-f0-9]{3}|[A-Fa-f0-9]{6})$/', $color);
}
private function detectPlatform(string $url): string
{
return match (true) {
str_contains($url, 'instagram.com') => 'instagram',
str_contains($url, 'linkedin.com') => 'linkedin',
str_contains($url, 'twitter.com') || str_contains($url, 'x.com') => 'twitter',
str_contains($url, 'facebook.com') => 'facebook',
str_contains($url, 'tiktok.com') => 'tiktok',
str_contains($url, 'youtube.com') => 'youtube',
default => 'website',
};
}
private function emptyResult(string $url, string $platform): array
{
return ['type' => 'social', 'platform' => $platform, 'url' => $url, 'title' => '', 'description' => '', 'image' => '', 'site_name' => ''];
}
private function emptyCorporateResult(string $url): array
{
return ['type' => 'corporate', 'platform' => 'website', 'url' => $url, 'title' => '', 'description' => '', 'image' => '', 'colors' => [], 'keywords' => '', 'author' => ''];
}
// Legacy single-URL fetch for backward compatibility
public function fetch(string $url): array
{
$platform = $this->detectPlatform($url);
return $platform === 'website'
? $this->fetchWebsiteData($url)
: $this->fetchSocialData($url, $platform);
}
}