Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/prompt_studio/prompt_studio_core_v2/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ class ToolStudioPromptKeys:
EXECUTION_SOURCE = "execution_source"
LINE_ITEM = "line-item"
CUSTOM_DATA = "custom_data"
SIGNATURE_METADATA = "signature_metadata"
SIGNATURE_PAGE_REFERENCES = "signature_page_references"
# Webhook postprocessing settings
ENABLE_POSTPROCESSING_WEBHOOK = "enable_postprocessing_webhook"
POSTPROCESSING_WEBHOOK_URL = "postprocessing_webhook_url"
Expand Down
6 changes: 4 additions & 2 deletions backend/prompt_studio/prompt_studio_core_v2/internal_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,13 @@ def indexing_status(request):
user_id = data.get("user_id", "")
doc_id_key = data.get("doc_id_key", "")

if not action or not org_id or not user_id or not doc_id_key:
# user_id may be empty (e.g. mock auth users) - it's only used as a
# Redis cache key fragment, so empty is acceptable.
if not action or not org_id or not doc_id_key:
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return JsonResponse(
{
"success": False,
"error": "action, org_id, user_id, doc_id_key are required",
"error": "action, org_id, doc_id_key are required",
},
status=status.HTTP_400_BAD_REQUEST,
)
Expand Down
146 changes: 136 additions & 10 deletions backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,22 @@
import time
import uuid
from pathlib import Path
from typing import Any
from typing import Any, NamedTuple


class ExtractResult(NamedTuple):
"""Return value of ``PromptStudioHelper.dynamic_extractor``.

``signature_metadata`` and ``signature_page_references`` are populated
only when the x2text adapter is LLMWhisperer V2 in ``document_insights``
mode and the document contains signatures. They are read either from
the live extract dispatch result (cache miss) or from the on-disk
``.doc_insights.json`` sidecar (cache hit).
"""

text: str
signature_metadata: dict[str, Any] | None = None
signature_page_references: dict[str, Any] | None = None

from account_v2.constants import Common
from account_v2.models import User
Expand Down Expand Up @@ -672,7 +687,7 @@
]

@staticmethod
def build_fetch_response_payload(

Check failure on line 690 in backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 16 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=Zipstack_unstract&issues=AZ4mWP2m2iU0xy3YPLpF&open=AZ4mWP2m2iU0xy3YPLpF&pullRequest=1967
tool: CustomTool,
doc_path: str,
doc_name: str,
Expand Down Expand Up @@ -734,14 +749,15 @@
)

# Extract (blocking, usually cached)
extracted_text = PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=profile_manager,
file_path=file_path,
org_id=org_id,
document_id=document_id,
run_id=run_id,
enable_highlight=tool.enable_highlight,
)
extracted_text = extract_result.text

is_summary = tool.summarize_as_source
if is_summary:
Expand Down Expand Up @@ -836,6 +852,14 @@
tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
)
if extract_result.signature_metadata:
tool_settings[TSPKeys.SIGNATURE_METADATA] = (
extract_result.signature_metadata
)
if extract_result.signature_page_references:
tool_settings[TSPKeys.SIGNATURE_PAGE_REFERENCES] = (
extract_result.signature_page_references
)

file_hash = fs_instance.get_hash_from_file(path=extract_path)

Expand Down Expand Up @@ -951,14 +975,15 @@
)

# Extract ONCE (blocking, usually cached)
extracted_text = PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=profile_manager,
file_path=file_path,
org_id=org_id,
document_id=document_id,
run_id=run_id,
enable_highlight=tool.enable_highlight,
)
extracted_text = extract_result.text

is_summary = tool.summarize_as_source
if is_summary:
Expand Down Expand Up @@ -1026,6 +1051,14 @@
tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
)
if extract_result.signature_metadata:
tool_settings[TSPKeys.SIGNATURE_METADATA] = (
extract_result.signature_metadata
)
if extract_result.signature_page_references:
tool_settings[TSPKeys.SIGNATURE_PAGE_REFERENCES] = (
extract_result.signature_page_references
)

file_hash = fs_instance.get_hash_from_file(path=extract_path)

Expand Down Expand Up @@ -1077,7 +1110,7 @@
return context, cb_kwargs

@staticmethod
def build_single_pass_payload(

Check failure on line 1113 in backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 16 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=Zipstack_unstract&issues=AZ4mWP2m2iU0xy3YPLpG&open=AZ4mWP2m2iU0xy3YPLpG&pullRequest=1967
tool: CustomTool,
doc_path: str,
doc_name: str,
Expand Down Expand Up @@ -1126,7 +1159,7 @@
)

# Extract (blocking, usually cached)
PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=default_profile,
file_path=doc_path,
org_id=org_id,
Expand Down Expand Up @@ -1165,6 +1198,14 @@
or TSPKeys.SIMPLE,
TSPKeys.SIMILARITY_TOP_K: default_profile.similarity_top_k,
}
if extract_result.signature_metadata:
tool_settings[TSPKeys.SIGNATURE_METADATA] = (
extract_result.signature_metadata
)
if extract_result.signature_page_references:
tool_settings[TSPKeys.SIGNATURE_PAGE_REFERENCES] = (
extract_result.signature_page_references
)

for p in prompts:
if not p.prompt:
Expand Down Expand Up @@ -1366,14 +1407,15 @@
tool=util,
)

extracted_text = PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=default_profile,
file_path=file_path,
org_id=org_id,
document_id=document_id,
run_id=run_id,
enable_highlight=tool.enable_highlight,
)
extracted_text = extract_result.text
if tool.summarize_context:
summarize_file_path = PromptStudioHelper.summarize(
file_name, org_id, run_id, tool
Expand Down Expand Up @@ -1817,14 +1859,15 @@
tool=util,
)
logger.info(f"Extracting text from {file_path} for {doc_id}")
extracted_text = PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=profile_manager,
file_path=file_path,
org_id=org_id,
document_id=document_id,
run_id=run_id,
enable_highlight=tool.enable_highlight,
)
extracted_text = extract_result.text
logger.info(f"Extracted text from {file_path} for {doc_id}")
if is_summary:
profile_manager.chunk_size = 0
Expand Down Expand Up @@ -1933,6 +1976,14 @@
tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
)
if extract_result.signature_metadata:
tool_settings[TSPKeys.SIGNATURE_METADATA] = (
extract_result.signature_metadata
)
if extract_result.signature_page_references:
tool_settings[TSPKeys.SIGNATURE_PAGE_REFERENCES] = (
extract_result.signature_page_references
)
file_hash = fs_instance.get_hash_from_file(path=doc_path)

payload = {
Expand Down Expand Up @@ -2194,7 +2245,7 @@
file_path = os.path.join(
directory, "extract", os.path.splitext(filename)[0] + ".txt"
)
PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=default_profile,
file_path=input_file_path,
org_id=org_id,
Expand Down Expand Up @@ -2232,6 +2283,14 @@
default_profile.retrieval_strategy or TSPKeys.SIMPLE
)
tool_settings[TSPKeys.SIMILARITY_TOP_K] = default_profile.similarity_top_k
if extract_result.signature_metadata:
tool_settings[TSPKeys.SIGNATURE_METADATA] = (
extract_result.signature_metadata
)
if extract_result.signature_page_references:
tool_settings[TSPKeys.SIGNATURE_PAGE_REFERENCES] = (
extract_result.signature_page_references
)
for prompt in prompts:
if not prompt.prompt:
raise EmptyPromptError()
Expand Down Expand Up @@ -2291,15 +2350,58 @@
except CustomTool.DoesNotExist:
return None

@staticmethod
def _signature_sidecar_path(extract_file_path: str) -> str:
p = Path(extract_file_path)
return str(p.with_suffix("")) + ".doc_insights.json"

@staticmethod
def _load_signature_sidecar(
extract_file_path: str,
fs_instance: Any,
) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
"""Return ``(signature_metadata, signature_page_references)`` from the
sidecar, or ``(None, None)`` if the sidecar is missing or unreadable.

Signature data is only written by the executor when a document
contains signatures in document_insights mode; cache-hit calls
for documents extracted in other modes legitimately have no
sidecar, so absence is not an error.
"""
sidecar_path = PromptStudioHelper._signature_sidecar_path(extract_file_path)
try:
raw = fs_instance.read(path=sidecar_path, mode="r")
except FileNotFoundError:
return None, None
except Exception as e:
logger.warning(
"DOC_INSIGHTS sidecar: failed to read %s: %s",
sidecar_path,
e,
)
return None, None
try:
data = json.loads(raw)
except (TypeError, ValueError) as e:
logger.warning(
"DOC_INSIGHTS sidecar: failed to parse %s: %s",
sidecar_path,
e,
)
return None, None
sig_meta = data.get("signature_metadata") or None
sig_refs = data.get("signature_page_references") or None
return sig_meta, sig_refs
Comment thread
coderabbitai[bot] marked this conversation as resolved.

@staticmethod
def dynamic_extractor(

Check failure on line 2397 in backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this function to reduce its Cognitive Complexity from 16 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=Zipstack_unstract&issues=AZ4mWP2m2iU0xy3YPLpH&open=AZ4mWP2m2iU0xy3YPLpH&pullRequest=1967
file_path: str,
enable_highlight: bool,
run_id: str,
org_id: str,
profile_manager: ProfileManager,
document_id: str,
) -> str:
) -> ExtractResult:
# Guard against None metadata (when adapter_metadata_b is None)
metadata = profile_manager.x2text.metadata or {}
x2text_config_hash = ToolUtils.hash_str(json.dumps(metadata, sort_keys=True))
Expand Down Expand Up @@ -2329,7 +2431,15 @@
try:
extracted_text = fs_instance.read(path=extract_file_path, mode="r")
logger.info("Extracted text found. Reading from file..")
return extracted_text
sig_meta, sig_refs = PromptStudioHelper._load_signature_sidecar(
extract_file_path=extract_file_path,
fs_instance=fs_instance,
)
return ExtractResult(
text=extracted_text,
signature_metadata=sig_meta,
signature_page_references=sig_refs,
)
except FileNotFoundError as e:
logger.warning(
f"File not found for extraction. {extract_file_path}. {e}"
Expand Down Expand Up @@ -2383,6 +2493,18 @@
)

extracted_text = result.data.get("extracted_text", "")
signature_metadata = result.data.get("signature_metadata")
signature_page_references = result.data.get("signature_page_references")
if signature_metadata or signature_page_references:
logger.info(
"DOC_INSIGHTS dynamic_extractor: captured signature data "
"(pages=%s, refs=%s) for document %s",
list(signature_metadata.keys()) if signature_metadata else [],
list(signature_page_references.keys())
if signature_page_references
else [],
document_id,
)
success = PromptStudioIndexHelper.mark_extraction_status(
document_id=document_id,
profile_manager=profile_manager,
Expand All @@ -2395,7 +2517,11 @@
f"Extraction completed but status not saved."
)

return extracted_text
return ExtractResult(
text=extracted_text,
signature_metadata=signature_metadata,
signature_page_references=signature_page_references,
)

@staticmethod
def export_project_settings(tool: CustomTool) -> dict:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,14 +415,22 @@ const TextResult = ({

const confidence = getConfidenceForText();

return enableHighlight ? (
// Make the answer clickable when the tool has highlighting enabled OR
// when the backend produced highlight_data (e.g. signature page refs
// from LLMWhisperer's document_insights mode), so signature highlights
// still work without requiring the separate enable_highlight toggle.
const hasHighlightData =
Array.isArray(highlightData) && highlightData.length > 0;
const isClickable = enableHighlight || hasHighlightData;

return isClickable ? (
<Typography.Text
wrap
onClick={() =>
onSelectHighlight(highlightData, promptId, profileId, confidence)
}
className={`prompt-output-result json-value ${
highlightData ? "clickable" : ""
hasHighlightData ? "clickable" : ""
} ${selectedHighlight?.highlightedPrompt === promptId ? "selected" : ""}`}
>
{parsedOutput}
Expand Down
41 changes: 25 additions & 16 deletions frontend/src/components/custom-tools/prompt-card/PromptCard.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -240,23 +240,32 @@ const PromptCard = memo(
highlightedProfile,
confidenceData,
) => {
if (details?.enable_highlight) {
const processedHighlight =
singlePassExtractMode &&
typeof highlightData === "object" &&
!Array.isArray(highlightData)
? flattenHighlightData(highlightData)
: highlightData;

updateCustomTool({
selectedHighlight: {
highlight: processedHighlight,
highlightedPrompt: highlightedPrompt,
highlightedProfile: highlightedProfile,
confidence: confidenceData,
},
});
// Allow highlight state to update when the tool has highlighting
// enabled OR when the backend produced highlight_data (e.g.
// signature page refs from LLMWhisperer's document_insights mode),
// so signature-driven page jumps work without the separate
// enable_highlight toggle.
const hasHighlightData = Array.isArray(highlightData)
? highlightData.length > 0
: Boolean(highlightData);
Comment thread
greptile-apps[bot] marked this conversation as resolved.
if (!details?.enable_highlight && !hasHighlightData) {
return;
}
const processedHighlight =
singlePassExtractMode &&
typeof highlightData === "object" &&
!Array.isArray(highlightData)
? flattenHighlightData(highlightData)
: highlightData;

updateCustomTool({
selectedHighlight: {
highlight: processedHighlight,
highlightedPrompt: highlightedPrompt,
highlightedProfile: highlightedProfile,
confidence: confidenceData,
},
});
};

const handleTypeChange = (value) => {
Expand Down
Loading