# robots.txt — Claro Perú | AI/GEO Ready # Última actualización: 2026-05-20 # Optimizado para SEO tradicional + AEO/GEO (LLM, AI Search, AI Overviews) # # Guía de descubrimiento para LLMs: /llms.txt # Sitio oficial: https://www.claro.com.pe/ # ============================================================= # 1. Acceso general — SEO tradicional # ============================================================= User-agent: * Allow: / # Áreas técnicas / privadas / no canónicas Disallow: /wps/ Disallow: /portal/pe/ Disallow: /portal/pe/recursos_portlets/ Disallow: /portal/pe/recursos_tema/ Disallow: /portal/pe/recursos_contenido/ Disallow: /api/ Disallow: /apps-react/ Disallow: /personas2/ Disallow: /empresas2/ Disallow: /personas-b/ Disallow: /conocimiento-ai/ # Buscadores internos y endpoints utilitarios Disallow: /buscador/ Disallow: /buscador Disallow: /*?s= Disallow: /*?q= Disallow: /*?utm_* Disallow: /*&utm_* # Páginas de gracias / confirmación (no aportan valor a buscadores) Disallow: /gracias-*/ Disallow: /migajear_gracias/ Disallow: /constancia-del-registro-de-la-solicitud-de-envio-de-publicidad/ # Sandboxes, demos, tests y duplicados detectados en sitemap Disallow: /test-*/ Disallow: /test-search/ Disallow: /test-search-amx/ Disallow: /test-personas_csa/ Disallow: /test-componentes/ Disallow: /test-componentes-cms/ Disallow: /testpage/ Disallow: /latam-test/ Disallow: /claro-informado_test/ Disallow: /demo-componentes/ Disallow: /making-of/ Disallow: /error/ # Versiones backup / variantes _1 / experimentales Disallow: /*_backup/ Disallow: /*_backup$ Disallow: /renovacion_backup/ Disallow: /formulario-cac-colaborador_backup/ Disallow: /personas/movil/activa-chip_backup/ Disallow: /personas/beneficios/movil/prestame-saldo_backup/ Disallow: /5g_1/ Disallow: /traficoenvivo_1/ Disallow: /roaming_1/ Disallow: /mide-tu-velocidad_1/ Disallow: /personas/movil/prepago/crece-tu-recarga_1/ Disallow: /personas/movil/prepago/control-automatico-v3/ Disallow: /personas/movil/prepago/bono-provincia2/ Disallow: /personas/movil/postpago-a/ Disallow: /personas/movil/postpago-b/ Disallow: /empresas/movil/telefonia-movil/recargas_old/ # Activos permitidos siempre (renderizado correcto + Open Graph) Allow: /*.css$ Allow: /*.js$ Allow: /*.svg$ Allow: /*.png$ Allow: /*.jpg$ Allow: /*.jpeg$ Allow: /*.webp$ Allow: /*.gif$ Allow: /*.ico$ Allow: /*.woff$ Allow: /*.woff2$ Allow: */*favicon.*$ Allow: */icon-touch*.*$ Allow: /portal/pe/recursos_tema/assets/img/favicon.*$ Allow: /portal/pe/recursos_tema/assets/img/icon-touch*.*$ # Recursos clave para descubrimiento por IA Allow: /llms.txt Allow: /sitemap.xml Allow: /robots.txt # ============================================================= # 2. Crawlers de motores tradicionales # ============================================================= User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: Googlebot-Video Allow: / User-agent: Bingbot Allow: / User-agent: DuckDuckBot Allow: / User-agent: Slurp Allow: / User-agent: YandexBot Allow: / User-agent: Baiduspider Allow: / # ============================================================= # 3. Crawlers de IA generativa — entrenamiento de modelos # (Allow del corpus AI-curado /conocimiento-ai/, sin bloquear el resto) # ============================================================= # Google AI (Bard / Gemini / AI Overviews — entrenamiento) User-agent: Google-Extended Allow: / # OpenAI (entrenamiento de modelos GPT) User-agent: GPTBot Allow: / # Anthropic (entrenamiento de modelos Claude) User-agent: ClaudeBot Allow: / User-agent: anthropic-ai Allow: / # Perplexity (entrenamiento) User-agent: PerplexityBot Allow: / # Common Crawl (datasets utilizados por múltiples LLMs) User-agent: CCBot Allow: / # Apple Intelligence (entrenamiento) User-agent: Applebot-Extended Allow: / # Meta AI (entrenamiento) User-agent: Meta-ExternalAgent Allow: / User-agent: FacebookBot Allow: / # Amazon (entrenamiento Alexa / modelos Amazon) User-agent: Amazonbot Allow: / # Cohere User-agent: cohere-ai Allow: / # Mistral AI User-agent: MistralAI-User Allow: / # You.com User-agent: YouBot Allow: / # Diffbot User-agent: Diffbot Allow: / # ByteDance (TikTok / Doubao). Permitido por defecto; cambiar a Disallow si la marca decide excluir. User-agent: Bytespider Allow: / # ============================================================= # 4. Crawlers de IA en tiempo real (AI Search / browsing por usuario) # Importante para AI Overviews, ChatGPT Search, Perplexity, etc. # ============================================================= # OpenAI — ChatGPT Search (recuperación en tiempo real) User-agent: OAI-SearchBot Allow: / # OpenAI — usuario ChatGPT con browsing User-agent: ChatGPT-User Allow: / # Anthropic — Claude con browsing User-agent: Claude-Web Allow: / User-agent: Claude-User Allow: / User-agent: Claude-SearchBot Allow: / # Perplexity — usuario en Perplexity User-agent: Perplexity-User Allow: / # Microsoft Copilot User-agent: CopilotBot Allow: / # DuckDuckGo Assist User-agent: DuckAssistBot Allow: / # ============================================================= # 5. Bots no deseados / scrapers agresivos (bloqueo explícito) # ============================================================= User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: PetalBot Disallow: / User-agent: SeekportBot Disallow: / User-agent: BLEXBot Disallow: / # ============================================================= # 6. Cortesía y rate-limit (no estándar, ignorado por la mayoría # de bots serios pero útil para legacy / honeypots) # ============================================================= Crawl-delay: 1 # ============================================================= # 7. Sitemaps # ============================================================= Sitemap: https://www.claro.com.pe/sitemap.xml Sitemap: https://www.claro.com.pe/conocimiento-ai/sitemap-llms.xml # AI hint (no estándar, leído por algunos crawlers GEO): # LLMs: https://www.claro.com.pe/llms.txt