/**
 * Rezume Content Script
 * Extracts job description text from the current page.
 * Injected into all pages — listens for messages from the popup.
 *
 * Strategy:
 *   1. Try known job-board CSS selectors (fast, precise).
 *   2. Fall back to heuristic scoring across all block elements.
 *      Scoring uses:
 *        - Text length (base)
 *        - JD signal-word hits (+10 each)
 *        - SWE tech-keyword hits (+3 each, capped at +60)
 *        - Structural penalties for nav/header/footer/aside (-50)
 *   3. Return a `confidence` field so the popup can show nuanced status.
 */

'use strict';

// ---------------------------------------------------------------------------
// Known job board selectors  (most specific first)
// ---------------------------------------------------------------------------
const JOB_BOARD_SELECTORS = [
  // LinkedIn — multiple selector strategies for different layouts / login states
  {
    host: 'linkedin.com',
    selectors: [
      // Logged-in full job view
      '.jobs-description__content',
      '.jobs-description-content__text',
      '.jobs-description',
      // Public / logged-out job posting
      '.description__text',
      '.show-more-less-html__markup',
      // Newer 2024+ layouts
      '[class*="job-details-jobs-unified-top-card"]',
      '[class*="jobs-unified-top-card"]',
      '.jobs-box__html-content',
      '.jobs-description-details',
      // Fallback: any section labelled "description" on linkedin
      'section[aria-label*="Job description" i]',
      'div[id*="job-details"]',
    ],
  },
  // Indeed
  {
    host: 'indeed.com',
    selectors: [
      '#jobDescriptionText',
      '.jobsearch-jobDescriptionText',
      '[class*="jobDescription"]',
    ],
  },
  // Greenhouse (greenhouse.io hosted JDs)
  { host: 'greenhouse.io',  selectors: ['#content', '.job-post', '.job__description'] },
  // Lever
  { host: 'lever.co',       selectors: ['.posting-description', '.posting', '[class*="posting"]'] },
  // Workday
  { host: 'myworkdayjobs.com', selectors: [
    '[data-automation-id="jobPostingDescription"]',
    '[data-automation-id="job-posting-details"]',
  ]},
  // Naukri
  { host: 'naukri.com',     selectors: ['.job-desc', '.dang-inner-html', '[class*="job-description"]'] },
  // AngelList / Wellfound
  { host: 'wellfound.com',  selectors: ['.jobPostingDetails', '[class*="description"]', '[class*="job-post"]'] },
  // Glassdoor
  { host: 'glassdoor.com',  selectors: ['[class*="jobDescription"]', '.desc', '[class*="JobDetails"]'] },
  // Workable
  { host: 'workable.com',   selectors: ['.job-description', '[class*="jobDescription"]'] },
  // SmartRecruiters
  { host: 'smartrecruiters.com', selectors: ['.job-sections', '[class*="job-detail"]'] },
  // Ashby
  { host: 'ashbyhq.com',    selectors: ['.ashby-job-posting-description', '[class*="description"]'] },
  // Generic career pages — broad class/id/aria patterns
  {
    host: null,
    selectors: [
      '[class*="job-description"]',
      '[class*="jobDescription"]',
      '[id*="job-description"]',
      '[id*="jobDescription"]',
      '[class*="job-details"]',
      '[id*="job-details"]',
      '[class*="posting-description"]',
      '[class*="job-posting"]',
      '[class*="job_description"]',
      'section[aria-label*="description" i]',
      'div[aria-label*="job description" i]',
      'article[class*="job"]',
    ],
  },
];

// ---------------------------------------------------------------------------
// JD Signal words — section headings and phrases common in job postings
// ---------------------------------------------------------------------------
const JD_SIGNAL_WORDS = [
  // Standard JD structure
  'responsibilities', 'requirements', 'qualifications', 'preferred qualifications',
  'what you will do', 'what you\'ll do', 'what you\'ll be doing', 'what you will be doing',
  'about the role', 'about this role', 'about the position',
  'we are looking for', 'we\'re looking for',
  'must have', 'nice to have', 'good to have',
  'skills required', 'experience required', 'job description',
  'you will', 'you\'ll', 'your role', 'your responsibilities',
  'ideal candidate', 'you are', 'who you are',
  'we offer', 'what we offer', 'benefits', 'perks',
  'about us', 'about the team', 'about the company',
  'join our team', 'join us',

  // Software-engineering specific phrases
  'technical', 'software engineer', 'software engineering', 'engineering team',
  'full stack', 'full-stack', 'backend', 'back-end', 'front-end', 'frontend',
  'software development', 'software developer',
  'coding', 'programming', 'computer science',
  'years of experience', 'bs in computer science', 'ms in computer science',
  'bachelor\'s degree', 'master\'s degree',

  // Collaboration / process signals
  'collaborate', 'cross-functional', 'problem solving', 'problem-solving',
  'unit testing', 'code review', 'pull request', 'peer review',
  'system design', 'architecture', 'scalability', 'performance',
  'agile', 'scrum', 'sprint', 'stand-up',

  // Compensation / offer language (confirms it's a real posting, not a blog)
  'salary', 'compensation', 'equity', 'stock options', 'pto', 'paid time off',
  'health insurance', 'remote', 'hybrid', 'on-site', 'relocation',
];

// ---------------------------------------------------------------------------
// SWE tech keywords — embedded flat subset of the server-side KEYWORD_DATABASE.
// Used to boost the heuristic score when a block contains real tech terms,
// and to calculate extraction confidence.
// ---------------------------------------------------------------------------
const SWE_TECH_KEYWORDS = [
  // Languages
  'python', 'javascript', 'typescript', 'java', 'c++', 'golang', 'go', 'rust',
  'ruby', 'php', 'swift', 'kotlin', 'scala', 'sql', 'html', 'css', 'bash', 'shell',
  'r language', 'matlab', 'haskell', 'elixir', 'clojure', 'groovy',

  // Frameworks & libraries
  'react', 'angular', 'vue', 'next.js', 'nuxt', 'svelte', 'node.js', 'express',
  'django', 'flask', 'fastapi', 'spring boot', 'spring', '.net', 'asp.net',
  'rails', 'laravel', 'symfony', 'tensorflow', 'pytorch', 'pandas', 'numpy',
  'scikit-learn', 'keras', 'jquery', 'tailwind',

  // Databases
  'postgresql', 'postgres', 'mysql', 'mongodb', 'redis', 'cassandra', 'dynamodb',
  'elasticsearch', 'oracle', 'sqlite', 'neo4j', 'couchdb', 'influxdb',
  'mariadb', 'sql server', 'snowflake', 'bigquery', 'redshift',

  // Cloud & infrastructure
  'aws', 'azure', 'gcp', 'google cloud', 'ec2', 's3', 'lambda', 'rds',
  'cloudformation', 'ecs', 'eks', 'cloudfront', 'vpc', 'iam',
  'heroku', 'digitalocean', 'vercel', 'netlify',

  // DevOps & tooling
  'docker', 'kubernetes', 'k8s', 'terraform', 'ansible', 'jenkins',
  'github actions', 'gitlab ci', 'circleci', 'travis ci', 'helm',
  'prometheus', 'grafana', 'datadog', 'new relic', 'splunk',
  'nginx', 'apache', 'git', 'github', 'gitlab', 'bitbucket',

  // Architecture & patterns
  'microservices', 'rest api', 'restful', 'graphql', 'grpc', 'kafka', 'rabbitmq',
  'ci/cd', 'serverless', 'api gateway', 'load balancing', 'message queue',
  'event-driven', 'service mesh', 'istio', 'caching', 'cdn',

  // Methodologies & practices
  'agile', 'scrum', 'kanban', 'devops', 'sre', 'tdd', 'bdd',
  'pair programming', 'continuous integration', 'continuous deployment', 'gitops',

  // Testing
  'jest', 'mocha', 'pytest', 'junit', 'selenium', 'cypress',
  'unit testing', 'integration testing', 'end-to-end testing',

  // Role/title words that appear inside JD bodies
  'software engineer', 'backend engineer', 'frontend engineer', 'full stack engineer',
  'devops engineer', 'data engineer', 'ml engineer', 'mobile engineer',
  'platform engineer', 'infrastructure engineer', 'site reliability engineer',
];

// ---------------------------------------------------------------------------
// Utility: clean extracted text
// ---------------------------------------------------------------------------
function cleanText(text) {
  return text
    .replace(/\r\n/g, '\n')
    .replace(/\r/g, '\n')
    .replace(/[ \t]+/g, ' ')        // collapse spaces/tabs
    .replace(/\n{3,}/g, '\n\n')     // max 2 consecutive blank lines
    .trim();
}

// ---------------------------------------------------------------------------
// countTechKeywords — how many SWE_TECH_KEYWORDS appear in a text string.
// Used for confidence calculation.
// ---------------------------------------------------------------------------
function countTechKeywords(text) {
  const lower = text.toLowerCase();
  return SWE_TECH_KEYWORDS.filter(kw => lower.includes(kw)).length;
}

// ---------------------------------------------------------------------------
// scoreElement — returns a JD-likelihood score for a DOM element.
// Higher = more likely to be a job description.
// ---------------------------------------------------------------------------
function scoreElement(el) {
  const text = (el.innerText || el.textContent || '').toLowerCase();
  if (text.length < 200) return 0; // Too short to be a JD

  // Base: longer text = higher starting score
  let score = text.length / 500;

  // JD signal-word hits (+10 each)
  for (const word of JD_SIGNAL_WORDS) {
    if (text.includes(word)) score += 10;
  }

  // SWE tech-keyword hits (+3 each, capped at +60)
  // This is the key SWE-specific gate: a real SWE job posting will mention
  // multiple tech terms; a generic blog or landing page won't.
  let techHits = 0;
  for (const kw of SWE_TECH_KEYWORDS) {
    if (text.includes(kw)) techHits++;
  }
  score += Math.min(techHits * 3, 60);

  // Penalise structural / navigation elements
  const tag  = el.tagName.toLowerCase();
  const role = (el.getAttribute('role') || '').toLowerCase();
  if (['nav', 'header', 'footer', 'aside'].includes(tag))                score -= 50;
  if (['navigation', 'banner', 'contentinfo'].includes(role))             score -= 50;

  return score;
}

// ---------------------------------------------------------------------------
// extractWithKnownSelectors — uses job-board specific CSS selectors.
// Wraps querySelector in try/catch to handle selectors that may be
// unsupported in older Chromium versions (e.g. :has()).
// ---------------------------------------------------------------------------
function extractWithKnownSelectors() {
  const currentHost = window.location.hostname;

  for (const board of JOB_BOARD_SELECTORS) {
    if (board.host && !currentHost.includes(board.host)) continue;

    for (const selector of board.selectors) {
      try {
        const el = document.querySelector(selector);
        if (el) {
          const text = cleanText(el.innerText || el.textContent || '');
          if (text.length > 150) return text;
        }
      } catch (_) {
        // Ignore selectors not supported by the current browser
      }
    }
  }
  return null;
}

// ---------------------------------------------------------------------------
// extractHeuristic — scans all block elements and picks the best-scoring one.
// ---------------------------------------------------------------------------
function extractHeuristic() {
  const candidates = Array.from(
    document.querySelectorAll('div, section, article, main, [role="main"]')
  );

  let best      = null;
  let bestScore = 0;

  for (const el of candidates) {
    if (!el.offsetParent && el.tagName !== 'BODY') continue;
    const score = scoreElement(el);
    if (score > bestScore) {
      bestScore = score;
      best      = el;
    }
  }

  if (best && bestScore > 5) {
    return cleanText(best.innerText || best.textContent || '');
  }

  // Last resort: full body
  return cleanText(document.body.innerText || '');
}

// ---------------------------------------------------------------------------
// confidence — maps tech-keyword hit count to a confidence label.
// The popup can display this so users understand extraction quality.
// ---------------------------------------------------------------------------
function calcConfidence(text, method) {
  const hits = countTechKeywords(text);
  if (method === 'targeted-selector') {
    // We already found a known job-board container; require fewer tech hits
    return hits >= 2 ? 'high' : hits >= 1 ? 'medium' : 'low';
  }
  // Heuristic extraction needs more evidence
  return hits >= 6 ? 'high' : hits >= 3 ? 'medium' : 'low';
}

// ---------------------------------------------------------------------------
// Main extraction function
// ---------------------------------------------------------------------------
function extractJobDescription() {
  const knownResult = extractWithKnownSelectors();
  if (knownResult) {
    return {
      text:       knownResult,
      method:     'targeted-selector',
      confidence: calcConfidence(knownResult, 'targeted-selector'),
      url:        window.location.href,
      title:      document.title,
    };
  }

  const heuristicResult = extractHeuristic();
  return {
    text:       heuristicResult,
    method:     'heuristic',
    confidence: calcConfidence(heuristicResult, 'heuristic'),
    url:        window.location.href,
    title:      document.title,
  };
}

// ---------------------------------------------------------------------------
// Message listener — popup sends { action: 'extractJD' }
// ---------------------------------------------------------------------------
chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {
  if (message.action === 'extractJD') {
    try {
      const result = extractJobDescription();
      sendResponse({ success: true, ...result });
    } catch (err) {
      sendResponse({ success: false, error: err.message });
    }
  }
  // Return true to keep the message channel open for async responses
  return true;
});
