diff --git a/AGENTS.md b/AGENTS.md index 0c8fa58..7f8f68e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -30,6 +30,14 @@ Forbidden: * Inferred intent * Heuristic task selection +Allowed (explicit exception): + +* Auto‑generated or inferred selectors, but **only** when they resolve to one of the following concrete patterns: + * `{ kind: "css"; selector: string }` + * `{ kind: "cssAll"; selector: string; index: number }` + * `{ kind: "textScope"; text: string }` + * `{ kind: "anchoredCss"; anchor: { kind: "textScope"; text: string }; selector: string }` + If a change violates this, it is **out of scope**. --- @@ -879,4 +887,3 @@ Legacy convenience is not a justification for ambiguity. > The tool must always make it obvious *what will run*, *with what*, and *why*. Agents exist to preserve this clarity — not to smooth it away. - diff --git a/sitecompanion/background.js b/sitecompanion/background.js index 3cf6826..d52524c 100644 --- a/sitecompanion/background.js +++ b/sitecompanion/background.js @@ -1,17 +1,4 @@ -const DEFAULT_TASKS = [ - { - id: "task-generic-fit", - name: "Generic Fit", - text: - "You should evaluate for my fit to the job. You don't need to suggest interview prep, we'll leave those for later. a bit of tuning to your answers: please keep things more compact, a single section for the evaluation is enough, you don't need to analyze every bullet point in the posting." - }, - { - id: "task-ratings-only", - name: "Ratings Only", - text: - "Give ratings out of 10 with headings and do not include any other text.\n\n1. Fit evaluation: my fit to the role.\n2. Company status: how well this company offers career development for me.\n3. Pay: use $25 CAD per hour as a baseline; rate the compensation." - } -]; +const DEFAULT_TASKS = []; const DEFAULT_SETTINGS = { apiKey: "", @@ -25,9 +12,8 @@ const DEFAULT_SETTINGS = { apiBaseUrl: "https://api.openai.com/v1", apiKeyHeader: "Authorization", apiKeyPrefix: "Bearer ", - model: "gpt-4o-mini", - systemPrompt: - "You are a precise, honest assistant. Be concise and avoid inventing details, be critical about evaluations. You should put in a small summary of all the sections at the end. You should answer in no longer than 3 sections including the summary. And remember to bold or italicize key points.", + model: "gpt-5.2", + systemPrompt: "", tasks: DEFAULT_TASKS, shortcuts: [], theme: "system", diff --git a/sitecompanion/content.js b/sitecompanion/content.js index cc5152f..4fdd162 100644 --- a/sitecompanion/content.js +++ b/sitecompanion/content.js @@ -1,11 +1,12 @@ function findMinimumScope(text) { if (!text) return null; - const normalized = text.trim(); + const normalized = normalizeWhitespace(text); if (!normalized) return null; const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT, { acceptNode: (node) => { - if (node.innerText.includes(normalized)) { + const nodeText = normalizeWhitespace(node.innerText); + if (nodeText.includes(normalized)) { return NodeFilter.FILTER_ACCEPT; } return NodeFilter.FILTER_REJECT; @@ -22,6 +23,18 @@ function findMinimumScope(text) { return deepest; } +function normalizeWhitespace(value) { + return String(value || "") + .replace(/\r?\n/g, " ") + .replace(/\s+/g, " ") + .trim() + .toLowerCase(); +} + +function isPlainObject(value) { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + function escapeSelector(value) { if (window.CSS && typeof CSS.escape === "function") { return CSS.escape(value); @@ -29,6 +42,259 @@ function escapeSelector(value) { return String(value).replace(/[^a-zA-Z0-9_-]/g, "\\$&"); } +function buildClassSelector(className) { + const parts = String(className || "") + .trim() + .split(/\s+/) + .filter(Boolean); + if (!parts.length) return ""; + return parts.map((name) => `.${escapeSelector(name)}`).join(""); +} + +function inferCssAllTarget(node) { + if (!node || node.nodeType !== 1) return null; + const classList = node.classList ? Array.from(node.classList) : []; + let best = null; + for (const className of classList) { + if (!className) continue; + const matches = Array.from(document.getElementsByClassName(className)); + const index = matches.indexOf(node); + if (index < 0) continue; + if (!best || matches.length < best.matches.length) { + best = { className, index, matches }; + } + } + if (best) { + return { + kind: "cssAll", + selector: `.${escapeSelector(best.className)}`, + index: best.index + }; + } + const className = + typeof node.className === "string" ? node.className.trim() : ""; + if (!className) return null; + const selector = buildClassSelector(className); + if (!selector) return null; + const matches = Array.from(document.getElementsByClassName(className)); + const index = matches.indexOf(node); + if (index < 0) return null; + return { kind: "cssAll", selector, index }; +} + +function inferCssTarget(node) { + if (!node || node.nodeType !== 1) return null; + const selector = buildSelector(node); + if (!selector) return null; + return { kind: "css", selector }; +} + +function inferAnchoredCssTarget(text) { + const trimmed = String(text || "").trim(); + if (!trimmed) return null; + return { + kind: "anchoredCss", + anchor: { kind: "textScope", text: trimmed }, + selector: ":scope" + }; +} + +function inferScopeTargets(text, node) { + const candidates = []; + const cssAll = inferCssAllTarget(node); + if (cssAll) candidates.push(cssAll); + const css = inferCssTarget(node); + if (css) candidates.push(css); + const anchoredCss = inferAnchoredCssTarget(text); + if (anchoredCss) candidates.push(anchoredCss); + const trimmed = String(text || "").trim(); + if (trimmed) { + candidates.push({ kind: "textScope", text: trimmed }); + } + return candidates; +} + +function selectInferredTarget(text, node) { + const candidates = inferScopeTargets(text, node); + for (const candidate of candidates) { + const resolved = resolveExtractionTarget(candidate); + if (!resolved.error && resolved.node === node) { + return candidate; + } + } + return null; +} + +function findBestScopeCandidate(text) { + const normalized = String(text || "").trim(); + if (!normalized) return null; + const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT, { + acceptNode: (node) => { + if (node.innerText.includes(normalized)) { + return NodeFilter.FILTER_ACCEPT; + } + return NodeFilter.FILTER_REJECT; + } + }); + + let best = null; + let node = walker.nextNode(); + while (node) { + if (node !== document.body) { + const cssAll = inferCssAllTarget(node); + if (cssAll) { + const resolved = resolveExtractionTarget(cssAll); + if (!resolved.error && resolved.node === node) { + const matchCount = document.querySelectorAll(cssAll.selector).length; + if (!best || matchCount < best.matchCount) { + best = { node, target: cssAll, matchCount }; + } + } + } + } + node = walker.nextNode(); + } + return best; +} + +function parseLegacySelectorString(value) { + const trimmed = String(value || "").trim(); + if (!trimmed) { + return { error: "Missing extraction target." }; + } + const classMatch = trimmed.match( + /^(?:document\.)?getElementsByClassName\(\s*(['"])(.+?)\1\s*\)\s*\[\s*(\d+)\s*\]\s*(?:\.innerText\s*)?;?$/i + ); + if (classMatch) { + const selector = buildClassSelector(classMatch[2]); + if (!selector) { + return { error: "Missing extraction target." }; + } + const index = Number.parseInt(classMatch[3], 10); + if (!Number.isInteger(index) || index < 0) { + return { error: "Invalid index." }; + } + return { + target: { kind: "cssAll", selector, index } + }; + } + if (trimmed.includes("getElementsByClassName")) { + return { error: "Unsupported extraction target." }; + } + return null; +} + +function normalizeExtractionTarget(input) { + if (!input) { + return { error: "Missing extraction target." }; + } + if (typeof input === "string") { + const parsed = parseLegacySelectorString(input); + if (parsed) { + if (parsed.error) return { error: parsed.error }; + return { target: parsed.target }; + } + const selector = input.trim(); + if (!selector) { + return { error: "Missing extraction target." }; + } + return { target: { kind: "css", selector } }; + } + if (!isPlainObject(input) || typeof input.kind !== "string") { + return { error: "Missing extraction target." }; + } + return { target: input }; +} + +function resolveExtractionTarget(target) { + if (!target || typeof target !== "object") { + return { error: "Missing extraction target." }; + } + + if (target.kind === "xpath") { + return { error: "XPath not supported." }; + } + + if (target.kind === "textScope") { + if (typeof target.text !== "string" || !target.text.trim()) { + return { error: "Missing extraction target." }; + } + const node = findMinimumScope(target.text); + if (!node) { + return { error: "Scope not found." }; + } + return { node }; + } + + if (target.kind === "anchoredCss") { + const anchor = target.anchor; + if ( + !anchor || + anchor.kind !== "textScope" || + typeof anchor.text !== "string" || + !anchor.text.trim() + ) { + return { error: "Missing extraction target." }; + } + const anchorNode = findMinimumScope(anchor.text); + if (!anchorNode) { + return { error: "Anchor scope not found." }; + } + const selector = target.selector || ""; + if (!selector.trim()) { + return { error: "Missing extraction target." }; + } + let node = null; + try { + node = anchorNode.querySelector(selector); + } catch { + return { error: "Invalid selector." }; + } + if (!node) { + return { error: "Selector matched no elements." }; + } + return { node }; + } + + if (target.kind === "css" || target.kind === "cssAll") { + const selector = target.selector || ""; + if (!selector) { + return { error: "Missing extraction target." }; + } + if (target.kind === "css") { + let node = null; + try { + node = document.querySelector(selector); + } catch { + return { error: "Invalid selector." }; + } + if (!node) { + return { error: "Selector matched no elements." }; + } + return { node }; + } + const index = target.index; + if (!Number.isInteger(index) || index < 0) { + return { error: "Invalid index." }; + } + let nodes = []; + try { + nodes = Array.from(document.querySelectorAll(selector)); + } catch { + return { error: "Invalid selector." }; + } + if (!nodes.length) { + return { error: "Selector matched no elements." }; + } + if (index >= nodes.length) { + return { error: "Index out of bounds." }; + } + return { node: nodes[index] }; + } + + return { error: "Unsupported extraction target." }; +} + function buildSelector(node) { if (!node || node.nodeType !== 1) return "body"; if (node === document.body) return "body"; @@ -308,41 +574,59 @@ observer.observe(document.documentElement, { childList: true, subtree: true }); chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => { if (!message || typeof message !== "object") return; if (message.type === "FIND_SCOPE") { - const node = findMinimumScope(message.text || ""); - if (!node) { - sendResponse({ ok: false, error: "Scope not found." }); + const rawText = message.text || ""; + const baseTarget = { kind: "textScope", text: rawText }; + const resolved = resolveExtractionTarget(baseTarget); + if (resolved.error) { + sendResponse({ ok: false, error: resolved.error }); return; } + let effectiveNode = resolved.node; + let responseTarget = selectInferredTarget(rawText, resolved.node) || baseTarget; + if (resolved.node === document.body) { + const scoped = findBestScopeCandidate(rawText); + if (scoped) { + effectiveNode = scoped.node; + responseTarget = scoped.target; + } else if ( + responseTarget.kind === "css" && + responseTarget.selector === "body" + ) { + responseTarget = baseTarget; + } + } sendResponse({ ok: true, - extracted: node.innerText || "", - selector: buildSelector(node) + extracted: effectiveNode.innerText || "", + target: responseTarget }); return; } if (message.type === "EXTRACT_BY_SELECTOR") { - const selector = message.selector || ""; - if (!selector) { - sendResponse({ ok: false, error: "Missing selector." }); + const { target, error } = normalizeExtractionTarget( + message.target ?? message.selector + ); + if (error) { + sendResponse({ ok: false, error }); return; } - let node = null; - try { - node = document.querySelector(selector); - } catch { - sendResponse({ ok: false, error: "Invalid selector." }); + const resolved = resolveExtractionTarget(target); + if (resolved.error) { + sendResponse({ ok: false, error: resolved.error }); return; } - if (!node) { - sendResponse({ ok: false, error: "Selector not found." }); - return; - } - sendResponse({ ok: true, extracted: node.innerText || "", selector }); + sendResponse({ ok: true, extracted: resolved.node.innerText || "", target }); return; } if (message.type === "EXTRACT_FULL") { - const extracted = document.body?.innerText || ""; - sendResponse({ ok: true, extracted, selector: "body" }); + const target = { kind: "css", selector: "body" }; + const resolved = resolveExtractionTarget(target); + if (resolved.error) { + const extracted = document.body?.innerText || ""; + sendResponse({ ok: true, extracted, target }); + return; + } + sendResponse({ ok: true, extracted: resolved.node.innerText || "", target }); } }); diff --git a/sitecompanion/manifest.json b/sitecompanion/manifest.json index 1f7d7b0..c7f27e9 100644 --- a/sitecompanion/manifest.json +++ b/sitecompanion/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 3, "name": "SiteCompanion", - "version": "0.4.1", + "version": "0.4.2", "description": "AI companion for site-bound text extraction and tasks.", "permissions": ["storage", "activeTab"], "host_permissions": [""], diff --git a/sitecompanion/popup.css b/sitecompanion/popup.css index 38016f4..e38c3dc 100644 --- a/sitecompanion/popup.css +++ b/sitecompanion/popup.css @@ -275,6 +275,11 @@ button:active { color: var(--accent-deep); } +.helper-text { + font-size: 11px; + color: var(--accent-deep); +} + .output { margin-top: 8px; border: 1px dashed var(--border); diff --git a/sitecompanion/popup.html b/sitecompanion/popup.html index 4502634..374471a 100644 --- a/sitecompanion/popup.html +++ b/sitecompanion/popup.html @@ -18,8 +18,10 @@

This site is not recognized. Paste partial text from the page you want to extract:

+
- + +
diff --git a/sitecompanion/popup.js b/sitecompanion/popup.js index 5f79528..09b6f7d 100644 --- a/sitecompanion/popup.js +++ b/sitecompanion/popup.js @@ -24,6 +24,8 @@ const unknownSiteState = document.getElementById("unknownSiteState"); const extractionReviewState = document.getElementById("extractionReviewState"); const normalExecutionState = document.getElementById("normalExecutionState"); const partialTextPaste = document.getElementById("partialTextPaste"); +const minimalExtractStatus = document.getElementById("minimalExtractStatus"); +const extractMinimalBtn = document.getElementById("extractMinimalBtn"); const extractFullBtn = document.getElementById("extractFullBtn"); const extractedPreview = document.getElementById("extractedPreview"); const siteNameInput = document.getElementById("siteNameInput"); @@ -49,7 +51,7 @@ const state = { currentPopupState: "unknown", globalTheme: "system", forcedTask: null, - siteTextSelector: "", + siteTextTarget: null, selectedTaskId: "", selectedEnvId: "", selectedProfileId: "" @@ -68,6 +70,7 @@ async function switchState(stateName) { } else if (stateName === "normal") { normalExecutionState.classList.remove("hidden"); } + setMinimalStatus(""); await chrome.storage.local.set({ lastPopupState: stateName }); } @@ -77,7 +80,7 @@ function buildPopupDraft() { siteText: state.siteText || "", urlPattern: urlPatternInput?.value?.trim() || "", siteName: siteNameInput?.value?.trim() || "", - siteTextSelector: state.siteTextSelector || "" + siteTextTarget: state.siteTextTarget }; } @@ -101,8 +104,10 @@ function applyPopupDraft(draft) { if (typeof draft.siteName === "string") { siteNameInput.value = draft.siteName; } - if (typeof draft.siteTextSelector === "string") { - state.siteTextSelector = draft.siteTextSelector; + if (draft.siteTextTarget) { + state.siteTextTarget = draft.siteTextTarget; + } else if (typeof draft.siteTextSelector === "string") { + state.siteTextTarget = { kind: "css", selector: draft.siteTextSelector }; } } @@ -127,6 +132,45 @@ function normalizeName(value) { return (value || "").trim().toLowerCase(); } +function escapeSelector(value) { + if (window.CSS && typeof CSS.escape === "function") { + return CSS.escape(value); + } + return String(value).replace(/[^a-zA-Z0-9_-]/g, "\\$&"); +} + +function buildClassSelector(className) { + const parts = String(className || "") + .trim() + .split(/\s+/) + .filter(Boolean); + if (!parts.length) return ""; + return parts.map((name) => `.${escapeSelector(name)}`).join(""); +} + +function parseLegacyDomSelectorString(rawValue) { + const trimmed = String(rawValue || "").trim(); + if (!trimmed) return null; + const classMatch = trimmed.match( + /^(?:document\.)?getElementsByClassName\(\s*(['"])(.+?)\1\s*\)\s*\[\s*(\d+)\s*\]\s*(?:\.innerText\s*)?;?$/i + ); + if (classMatch) { + const selector = buildClassSelector(classMatch[2]); + if (!selector) { + return { target: null, error: "Missing extraction target." }; + } + const index = Number.parseInt(classMatch[3], 10); + if (!Number.isInteger(index) || index < 0) { + return { target: null, error: "Invalid index." }; + } + return { target: { kind: "cssAll", selector, index }, error: null }; + } + if (trimmed.includes("getElementsByClassName")) { + return { target: null, error: "Unsupported extraction target." }; + } + return null; +} + function normalizeConfigList(list) { return Array.isArray(list) ? list.map((item) => ({ ...item, enabled: item.enabled !== false })) @@ -181,6 +225,26 @@ function resolveEffectiveList(globalItems, workspace, site, listKey, disabledKey return resolveScopedItems(workspaceEffective, siteItems, siteDisabled); } +function normalizeStoredExtractTarget(site) { + if (!site || typeof site !== "object") return null; + const direct = site.extractTarget; + if (direct && typeof direct === "object" && typeof direct.kind === "string") { + return direct; + } + if (typeof direct === "string" && direct.trim()) { + const legacy = parseLegacyDomSelectorString(direct); + if (legacy?.target) return legacy.target; + return { kind: "css", selector: direct.trim() }; + } + const legacy = site.extractSelector; + if (typeof legacy === "string" && legacy.trim()) { + const parsedLegacy = parseLegacyDomSelectorString(legacy); + if (parsedLegacy?.target) return parsedLegacy.target; + return { kind: "css", selector: legacy.trim() }; + } + return null; +} + function filterApiConfigsForScope(apiConfigs, workspace, site) { const workspaceDisabled = workspace?.disabledInherited?.apiConfigs || []; const siteDisabled = site?.disabledInherited?.apiConfigs || []; @@ -197,7 +261,7 @@ async function detectSite(url) { const { sites = [], workspaces = [] } = await getStorage(["sites", "workspaces"]); const normalizedSites = (Array.isArray(sites) ? sites : []).map((site) => ({ ...site, - extractSelector: site?.extractSelector || "body" + extractTarget: normalizeStoredExtractTarget(site) })); state.sites = normalizedSites; state.workspaces = workspaces; @@ -433,6 +497,12 @@ function setStatus(message) { statusEl.textContent = message; } +function setMinimalStatus(message) { + if (!minimalExtractStatus) return; + minimalExtractStatus.textContent = message || ""; + minimalExtractStatus.classList.toggle("hidden", !message); +} + function applyTheme(theme) { const value = theme || "system"; document.documentElement.dataset.theme = value; @@ -704,17 +774,24 @@ async function loadConfig() { const envs = normalizeConfigList(stored.envConfigs); const profiles = normalizeConfigList(stored.profiles); const shortcuts = normalizeConfigList(stored.shortcuts); + let needsSiteUpdate = false; const sites = Array.isArray(stored.sites) - ? stored.sites.map((site) => ({ - ...site, - extractSelector: site?.extractSelector || "body" - })) + ? stored.sites.map((site) => { + const target = normalizeStoredExtractTarget(site); + if (site?.extractSelector || typeof site?.extractTarget === "string") { + needsSiteUpdate = true; + } + return { ...site, extractTarget: target }; + }) : state.sites; const workspaces = Array.isArray(stored.workspaces) ? stored.workspaces : state.workspaces; state.sites = sites; state.workspaces = workspaces; + if (needsSiteUpdate) { + await chrome.storage.local.set({ sites }); + } const activeSite = state.currentSite ? sites.find((entry) => entry.id === state.currentSite.id) @@ -812,10 +889,14 @@ async function loadTheme() { async function handleExtract() { setStatus("Extracting..."); try { - const selector = state.currentSite?.extractSelector || "body"; + const target = normalizeStoredExtractTarget(state.currentSite); + if (!target) { + setStatus("Missing extraction target."); + return false; + } const response = await sendToActiveTab({ type: "EXTRACT_BY_SELECTOR", - selector + target }); if (!response?.ok) { setStatus(response?.error || "No text detected."); @@ -823,7 +904,7 @@ async function handleExtract() { } state.siteText = response.extracted || ""; - state.siteTextSelector = response.selector || selector; + state.siteTextTarget = response.target || target; updateSiteTextCount(); updatePromptCount(0); setStatus("Text extracted."); @@ -1045,45 +1126,70 @@ async function fillSiteDefaultsFromTab() { const tabs = await chrome.tabs.query({ active: true, currentWindow: true }); if (!tabs[0]?.url) return; const url = new URL(tabs[0].url); - urlPatternInput.value = url.hostname + url.pathname + "*"; + urlPatternInput.value = `${url.hostname}/*`; if (!siteNameInput.value.trim()) { siteNameInput.value = url.hostname; } } -partialTextPaste.addEventListener("input", async () => { - const text = partialTextPaste.value.trim(); - if (text.length < 5) return; +async function runMinimalExtraction(text, minLength = 5) { + const trimmed = (text || "").trim(); + if (trimmed.length < minLength) { + setMinimalStatus("Paste more text to extract."); + return false; + } setStatus("Finding scope..."); try { - const response = await sendToActiveTab({ type: "FIND_SCOPE", text }); + const response = await sendToActiveTab({ type: "FIND_SCOPE", text: trimmed }); if (response?.ok) { state.siteText = response.extracted; - state.siteTextSelector = response.selector || ""; + state.siteTextTarget = response.target || { kind: "textScope", text: trimmed }; extractedPreview.textContent = state.siteText; await fillSiteDefaultsFromTab(); switchState("review"); await persistPopupDraft(); + setMinimalStatus(""); setStatus("Review extraction."); + return true; } + setMinimalStatus(response?.error || "Text could not be matched."); + return false; } catch (error) { - setStatus("Error finding scope."); + setMinimalStatus(error?.message || "Error finding scope."); + return false; + } +} + +partialTextPaste.addEventListener("input", () => { + if (state.currentPopupState === "unknown") { + void persistPopupDraft(); + setMinimalStatus(""); } }); +extractMinimalBtn?.addEventListener("click", async () => { + await runMinimalExtraction(partialTextPaste.value, 1); +}); + extractFullBtn.addEventListener("click", async () => { + setMinimalStatus(""); setStatus("Extracting full text..."); try { - const response = await sendToActiveTab({ type: "EXTRACT_FULL" }); + const response = await sendToActiveTab({ + type: "EXTRACT_FULL" + }); if (response?.ok) { + const target = response.target || { kind: "css", selector: "body" }; state.siteText = response.extracted; - state.siteTextSelector = response.selector || "body"; + state.siteTextTarget = target; extractedPreview.textContent = state.siteText; await fillSiteDefaultsFromTab(); switchState("review"); await persistPopupDraft(); setStatus("Review extraction."); + } else { + setStatus(response?.error || "Error extracting text."); } } catch (error) { setStatus("Error extracting text."); @@ -1107,7 +1213,8 @@ retryExtractBtn.addEventListener("click", () => { urlPatternInput.value = ""; siteNameInput.value = ""; state.siteText = ""; - state.siteTextSelector = ""; + state.siteTextTarget = null; + setMinimalStatus(""); void clearPopupDraft(); setStatus("Ready."); }); @@ -1123,6 +1230,10 @@ confirmSiteBtn.addEventListener("click", async () => { setStatus("Enter a URL pattern."); return; } + if (!state.siteTextTarget) { + setStatus("Missing extraction target."); + return; + } // AGENTS.md: No URL pattern may be a substring of another. const conflict = state.sites.find(s => s.urlPattern.includes(pattern) || pattern.includes(s.urlPattern)); @@ -1136,7 +1247,7 @@ confirmSiteBtn.addEventListener("click", async () => { name, urlPattern: pattern, workspaceId: "global", // Default to global for now - extractSelector: state.siteTextSelector || "body" + extractTarget: state.siteTextTarget }; state.sites.push(newSite); diff --git a/sitecompanion/settings.js b/sitecompanion/settings.js index 154b7d9..1c31eef 100644 --- a/sitecompanion/settings.js +++ b/sitecompanion/settings.js @@ -29,11 +29,205 @@ const OPENAI_DEFAULTS = { apiKeyHeader: "Authorization", apiKeyPrefix: "Bearer " }; -const DEFAULT_MODEL = "gpt-4o-mini"; -const DEFAULT_SYSTEM_PROMPT = - "You are a precise, honest assistant. Be concise and avoid inventing details, be critical about evaluations. You should put in a small summary of all the sections at the end. You should answer in no longer than 3 sections including the summary. And remember to bold or italicize key points."; +const DEFAULT_MODEL = "gpt-5.2"; +const DEFAULT_SYSTEM_PROMPT = ""; const SIDEBAR_WIDTH_KEY = "sidebarWidth"; +function isPlainObject(value) { + return Boolean(value && typeof value === "object" && !Array.isArray(value)); +} + +function escapeSelector(value) { + if (window.CSS && typeof CSS.escape === "function") { + return CSS.escape(value); + } + return String(value).replace(/[^a-zA-Z0-9_-]/g, "\\$&"); +} + +function buildClassSelector(className) { + const parts = String(className || "") + .trim() + .split(/\s+/) + .filter(Boolean); + if (!parts.length) return ""; + return parts.map((name) => `.${escapeSelector(name)}`).join(""); +} + +function parseLegacyDomSelectorString(rawValue) { + const trimmed = String(rawValue || "").trim(); + if (!trimmed) return null; + const classMatch = trimmed.match( + /^(?:document\.)?getElementsByClassName\(\s*(['"])(.+?)\1\s*\)\s*\[\s*(\d+)\s*\]\s*(?:\.innerText\s*)?;?$/i + ); + if (classMatch) { + const selector = buildClassSelector(classMatch[2]); + if (!selector) { + return { target: null, error: "Missing extraction target." }; + } + const index = Number.parseInt(classMatch[3], 10); + if (!Number.isInteger(index) || index < 0) { + return { target: null, error: "Invalid index." }; + } + return { target: { kind: "cssAll", selector, index }, error: null }; + } + if (trimmed.includes("getElementsByClassName")) { + return { target: null, error: "Unsupported extraction target." }; + } + return null; +} + +function parseLooseJsonInput(rawValue) { + const trimmed = String(rawValue || "").trim(); + if (!trimmed.startsWith("{")) return null; + let normalized = trimmed; + normalized = normalized.replace( + /([{,]\s*)([A-Za-z_][A-Za-z0-9_]*)(\s*:)/g, + '$1"$2"$3' + ); + normalized = normalized.replace( + /'([^'\\]*(?:\\.[^'\\]*)*)'/g, + (_match, value) => `"${value.replace(/"/g, '\\"')}"` + ); + return normalized; +} + +function normalizeExtractionTargetValue(value) { + if (typeof value === "string") { + const legacy = parseLegacyDomSelectorString(value); + if (legacy) { + return legacy.target; + } + const trimmed = value.trim(); + return trimmed ? { kind: "css", selector: trimmed } : null; + } + if (isPlainObject(value) && typeof value.kind === "string") { + return value; + } + return null; +} + +function serializeExtractionTarget(target) { + if (!target) return ""; + if (typeof target === "string") { + const legacy = parseLegacyDomSelectorString(target); + if (legacy?.target) return JSON.stringify(legacy.target); + const trimmed = target.trim(); + if (!trimmed) return ""; + return JSON.stringify({ kind: "css", selector: trimmed }); + } + if (isPlainObject(target) && typeof target.kind === "string") { + return JSON.stringify(target); + } + return ""; +} + +function validateExtractionTarget(target) { + if (!target || typeof target !== "object") { + return "Missing extraction target."; + } + if (target.kind === "xpath") { + return "XPath not supported."; + } + if (target.kind === "css") { + return typeof target.selector === "string" && target.selector.trim() + ? null + : "Missing extraction target."; + } + if (target.kind === "cssAll") { + if (typeof target.selector !== "string" || !target.selector.trim()) { + return "Missing extraction target."; + } + if (!Number.isInteger(target.index) || target.index < 0) { + return "Invalid index."; + } + return null; + } + if (target.kind === "textScope") { + return typeof target.text === "string" && target.text.trim() + ? null + : "Missing extraction target."; + } + if (target.kind === "anchoredCss") { + const anchor = target.anchor; + if (!anchor || anchor.kind !== "textScope") { + return "Invalid anchor target."; + } + if (typeof anchor.text !== "string" || !anchor.text.trim()) { + return "Missing extraction target."; + } + if (typeof target.selector !== "string" || !target.selector.trim()) { + return "Missing extraction target."; + } + return null; + } + return "Unsupported extraction target."; +} + +function parseExtractionTargetInput(rawValue) { + const trimmed = (rawValue || "").trim(); + if (!trimmed) { + return { target: null, error: "Missing extraction target." }; + } + const legacy = parseLegacyDomSelectorString(trimmed); + if (legacy) { + if (legacy.error) { + return { target: null, error: legacy.error }; + } + const error = validateExtractionTarget(legacy.target); + return { target: legacy.target, error }; + } + if (trimmed.startsWith("textScope:")) { + const text = trimmed.slice("textScope:".length).trim(); + const target = { kind: "textScope", text }; + const error = validateExtractionTarget(target); + return { target, error }; + } + let target = null; + if (trimmed.startsWith("{")) { + try { + const parsed = JSON.parse(trimmed); + target = normalizeExtractionTargetValue(parsed); + } catch { + const normalized = parseLooseJsonInput(trimmed); + if (!normalized) { + return { target: null, error: "Invalid extraction target JSON." }; + } + try { + const parsed = JSON.parse(normalized); + target = normalizeExtractionTargetValue(parsed); + } catch { + return { target: null, error: "Invalid extraction target JSON." }; + } + } + } else { + target = { kind: "css", selector: trimmed }; + } + if (!target) { + return { target: null, error: "Invalid extraction target." }; + } + const error = validateExtractionTarget(target); + return { target, error }; +} + +function normalizeStoredExtractionTarget(site) { + const normalized = normalizeExtractionTargetValue(site?.extractTarget); + if (normalized) { + const changed = typeof site?.extractTarget === "string"; + return { target: normalized, changed }; + } + if (typeof site?.extractSelector === "string" && site.extractSelector.trim()) { + const legacy = parseLegacyDomSelectorString(site.extractSelector); + if (legacy?.target) { + return { target: legacy.target, changed: true }; + } + return { + target: { kind: "css", selector: site.extractSelector.trim() }, + changed: true + }; + } + return { target: null, changed: false }; +} + function getSidebarWidthLimits() { const min = 160; const max = Math.max(min, Math.min(360, window.innerWidth - 240)); @@ -2481,6 +2675,7 @@ function collectSites() { const patternInput = card.querySelector(".site-pattern"); const workspaceSelect = card.querySelector(".site-workspace"); const extractInput = card.querySelector(".site-extract-selector"); + const parsedTarget = parseExtractionTargetInput(extractInput?.value || ""); const themeSelect = card.querySelector(".appearance-theme"); const toolbarSelect = card.querySelector(".appearance-toolbar-position"); const envsContainer = card.querySelector(".site-envs"); @@ -2497,7 +2692,7 @@ function collectSites() { name: (nameInput?.value || "").trim(), urlPattern: (patternInput?.value || "").trim(), workspaceId: workspaceSelect?.value || "global", - extractSelector: (extractInput?.value || "").trim(), + extractTarget: parsedTarget.target, theme: themeSelect?.value || "inherit", toolbarPosition: toolbarSelect?.value || "inherit", envConfigs: envsContainer ? collectEnvConfigs(envsContainer) : [], @@ -2619,7 +2814,7 @@ function buildSiteCard(site, allWorkspaces = []) { extractLabel.textContent = "Site Text Selector"; const extractInput = document.createElement("input"); extractInput.type = "text"; - extractInput.value = site.extractSelector || ""; + extractInput.value = serializeExtractionTarget(site.extractTarget); extractInput.className = "site-extract-selector"; extractInput.placeholder = "body"; extractInput.addEventListener("input", () => { @@ -3203,6 +3398,11 @@ function updateSidebarErrors() { ".shortcut-name", `${label} shortcuts` ); + const extractInput = card.querySelector(".site-extract-selector"); + const { error } = parseExtractionTargetInput(extractInput?.value || ""); + if (error) { + errors.push(`${label} site text selector: ${error}`); + } }); checkNameInputs(sitesContainer, ".site-name", "Sites"); @@ -3408,13 +3608,18 @@ async function loadSettings() { } if (Array.isArray(sites)) { + let needsSiteUpdate = false; sites = sites.map((site) => { if (!site || typeof site !== "object") return site; + const normalizedTarget = normalizeStoredExtractionTarget(site); + if (normalizedTarget.changed) { + needsSiteUpdate = true; + } return { ...site, name: site.name || site.urlPattern || "", workspaceId: site.workspaceId || "global", - extractSelector: typeof site.extractSelector === "string" ? site.extractSelector : "", + extractTarget: normalizedTarget.target, theme: site.theme || "inherit", toolbarPosition: site.toolbarPosition || "inherit", envConfigs: normalizeConfigList(site.envConfigs), @@ -3424,6 +3629,9 @@ async function loadSettings() { disabledInherited: normalizeDisabledInherited(site.disabledInherited) }; }); + if (needsSiteUpdate) { + await chrome.storage.local.set({ sites }); + } } // Load basic resources first so they are available for shortcuts/workspaces