Al-HUWAITI Shell
Al-huwaiti


Server : LiteSpeed
System : Linux server335.web-hosting.com 4.18.0-553.62.1.lve.el8.x86_64 #1 SMP Mon Jul 21 17:50:35 UTC 2025 x86_64
User : cardxfeb ( 2452)
PHP Version : 8.1.34
Disable Function : NONE
Directory :  /home/cardxfeb/www/app/Services/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : /home/cardxfeb/www/app/Services/SocialScraperService.php
<?php

namespace App\Services;

use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;

class SocialScraperService
{
    /**
     * Fetch from multiple URLs and merge results.
     * Separates social profiles from corporate websites.
     */
    public function fetchMultiple(array $urls): array
    {
        $sources = [];

        foreach ($urls as $url) {
            $url = trim($url);
            if (empty($url) || !filter_var($url, FILTER_VALIDATE_URL)) {
                continue;
            }

            $platform = $this->detectPlatform($url);

            if ($platform === 'website') {
                $sources['corporate'][] = $this->fetchWebsiteData($url);
            } else {
                $sources['social'][] = $this->fetchSocialData($url, $platform);
            }
        }

        return [
            'social'    => $sources['social']    ?? [],
            'corporate' => $sources['corporate'] ?? [],
            'merged'    => $this->merge($sources),
        ];
    }

    /**
     * Fetch Open Graph data from a social profile URL.
     */
    public function fetchSocialData(string $url, string $platform = null): array
    {
        $platform = $platform ?? $this->detectPlatform($url);

        try {
            $response = Http::withHeaders([
                'User-Agent'      => 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
                'Accept-Language' => 'pt-BR,pt;q=0.9,en;q=0.8',
                'Accept'          => 'text/html,application/xhtml+xml',
            ])->timeout(10)->get($url);

            if (!$response->ok()) {
                return $this->emptyResult($url, $platform);
            }

            $html = $response->body();

            return [
                'type'        => 'social',
                'platform'    => $platform,
                'url'         => $url,
                'title'       => $this->extractOg($html, 'title')  ?: $this->extractMeta($html, 'title') ?: $this->extractTag($html, 'title'),
                'description' => $this->extractOg($html, 'description') ?: $this->extractMeta($html, 'description'),
                'image'       => $this->extractOg($html, 'image')  ?: '',
                'site_name'   => $this->extractOg($html, 'site_name') ?: '',
            ];

        } catch (\Exception $e) {
            Log::error("[SocialScraper] Social fetch failed for {$url}: " . $e->getMessage());
            return $this->emptyResult($url, $platform);
        }
    }

    /**
     * Fetch corporate website data: title, description, AND brand colors.
     * Detects colors from: theme-color meta, CSS custom properties, og:image dominant.
     */
    public function fetchWebsiteData(string $url): array
    {
        try {
            $response = Http::withHeaders([
                'User-Agent' => 'Mozilla/5.0 (compatible; CardAoSegundoBot/1.0)',
                'Accept'     => 'text/html,application/xhtml+xml',
            ])->timeout(12)->get($url);

            if (!$response->ok()) {
                return $this->emptyCorporateResult($url);
            }

            $html = $response->body();

            // Extract colors from multiple sources
            $colors = $this->extractBrandColors($html);

            return [
                'type'        => 'corporate',
                'platform'    => 'website',
                'url'         => $url,
                'title'       => $this->extractOg($html, 'title') ?: $this->extractTag($html, 'title'),
                'description' => $this->extractOg($html, 'description') ?: $this->extractMeta($html, 'description'),
                'image'       => $this->extractOg($html, 'image') ?: '',
                'colors'      => $colors,
                'keywords'    => $this->extractMeta($html, 'keywords'),
                'author'      => $this->extractMeta($html, 'author'),
            ];

        } catch (\Exception $e) {
            Log::error("[SocialScraper] Website fetch failed for {$url}: " . $e->getMessage());
            return $this->emptyCorporateResult($url);
        }
    }

    /**
     * Extract brand colors from HTML/CSS:
     * Priority: theme-color > CSS :root vars > msapplication-TileColor > og:image
     */
    private function extractBrandColors(string $html): array
    {
        $colors = [];

        // 1. theme-color meta tag (most reliable)
        if (preg_match('/<meta[^>]+name=["\']theme-color["\'][^>]+content=["\']([#\w\d,\s]+)["\']/', $html, $m)) {
            $colors['theme_color'] = trim($m[1]);
        }

        // 2. msapplication-TileColor (Windows tiles)
        if (preg_match('/<meta[^>]+name=["\']msapplication-TileColor["\'][^>]+content=["\']([#\w\d]+)["\']/', $html, $m)) {
            $colors['tile_color'] = trim($m[1]);
        }

        // 3. CSS :root custom properties (--primary-color, --brand-color, etc.)
        $css_patterns = [
            '/--primary[-_]?color\s*:\s*([#\w\d]+)/i',
            '/--brand[-_]?color\s*:\s*([#\w\d]+)/i',
            '/--accent[-_]?color\s*:\s*([#\w\d]+)/i',
            '/--color[-_]?primary\s*:\s*([#\w\d]+)/i',
            '/--main[-_]?color\s*:\s*([#\w\d]+)/i',
        ];
        foreach ($css_patterns as $pattern) {
            if (preg_match($pattern, $html, $m)) {
                $hex = trim($m[1]);
                if ($this->isValidHex($hex)) {
                    $colors['css_primary'] = $hex;
                    break;
                }
            }
        }

        // 4. Inline style background-color on body/header
        if (preg_match('/<(?:body|header)[^>]+style=["\'][^"\']*background(?:-color)?\s*:\s*([#\w\d]+)/i', $html, $m)) {
            $hex = trim($m[1]);
            if ($this->isValidHex($hex)) {
                $colors['body_bg'] = $hex;
            }
        }

        // 5. Manifest theme_color from linked manifest
        if (preg_match('/<link[^>]+rel=["\']manifest["\'][^>]+href=["\']([^"\']+)["\']/', $html, $m)) {
            $colors['_manifest_url'] = $m[1]; // Store for potential follow-up fetch
        }

        return $colors;
    }

    /**
     * Merge social + corporate into unified analysis input
     */
    private function merge(array $sources): array
    {
        $social    = $sources['social']    ?? [];
        $corporate = $sources['corporate'] ?? [];

        $allColors = [];
        foreach ($corporate as $corp) {
            $allColors = array_merge($allColors, $corp['colors'] ?? []);
        }

        $primarySocial = $social[0] ?? [];

        return [
            'name'        => $primarySocial['title']       ?? ($corporate[0]['author'] ?? ''),
            'bio_hint'    => $primarySocial['description'] ?? '',
            'brand_name'  => $corporate[0]['title']        ?? '',
            'brand_desc'  => $corporate[0]['description']  ?? '',
            'brand_colors'=> $allColors,
            'platforms'   => array_map(fn($s) => $s['platform'], $social),
            'sources'     => count($sources['social'] ?? []) + count($sources['corporate'] ?? []),
        ];
    }

    //  Helpers 

    private function extractOg(string $html, string $property): string
    {
        if (preg_match('/<meta[^>]+property=["\']og:' . $property . '["\'][^>]+content=["\']([^"\']+)["\']/is', $html, $m)) {
            return trim(html_entity_decode($m[1], ENT_QUOTES, 'UTF-8'));
        }
        if (preg_match('/<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']og:' . $property . '["\']/is', $html, $m)) {
            return trim(html_entity_decode($m[1], ENT_QUOTES, 'UTF-8'));
        }
        return '';
    }

    private function extractMeta(string $html, string $name): string
    {
        if (preg_match('/<meta[^>]+name=["\']' . $name . '["\'][^>]+content=["\']([^"\']+)["\']/is', $html, $m)) {
            return trim(html_entity_decode($m[1], ENT_QUOTES, 'UTF-8'));
        }
        return '';
    }

    private function extractTag(string $html, string $tag): string
    {
        if (preg_match('/<' . $tag . '[^>]*>\s*(.*?)\s*<\/' . $tag . '>/is', $html, $m)) {
            return trim(strip_tags($m[1]));
        }
        return '';
    }

    private function isValidHex(string $color): bool
    {
        return (bool) preg_match('/^#([A-Fa-f0-9]{3}|[A-Fa-f0-9]{6})$/', $color);
    }

    private function detectPlatform(string $url): string
    {
        return match (true) {
            str_contains($url, 'instagram.com')                    => 'instagram',
            str_contains($url, 'linkedin.com')                     => 'linkedin',
            str_contains($url, 'twitter.com') || str_contains($url, 'x.com') => 'twitter',
            str_contains($url, 'facebook.com')                     => 'facebook',
            str_contains($url, 'tiktok.com')                       => 'tiktok',
            str_contains($url, 'youtube.com')                      => 'youtube',
            default                                                => 'website',
        };
    }

    private function emptyResult(string $url, string $platform): array
    {
        return ['type' => 'social', 'platform' => $platform, 'url' => $url, 'title' => '', 'description' => '', 'image' => '', 'site_name' => ''];
    }

    private function emptyCorporateResult(string $url): array
    {
        return ['type' => 'corporate', 'platform' => 'website', 'url' => $url, 'title' => '', 'description' => '', 'image' => '', 'colors' => [], 'keywords' => '', 'author' => ''];
    }

    // Legacy single-URL fetch for backward compatibility
    public function fetch(string $url): array
    {
        $platform = $this->detectPlatform($url);
        return $platform === 'website'
            ? $this->fetchWebsiteData($url)
            : $this->fetchSocialData($url, $platform);
    }
}

Al-HUWAITI Shell