-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
feat: add Jira ticket-context support #2420
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
541c220
3d539bb
cb9ee43
f3a0335
1bc29cd
8d6bac2
9a39046
926e33f
c78459f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| import re | ||
| import traceback | ||
|
|
||
| from atlassian import Jira | ||
|
|
||
| from pr_agent.config_loader import get_settings | ||
| from pr_agent.git_providers import GithubProvider | ||
| from pr_agent.git_providers import AzureDevopsProvider | ||
|
|
@@ -13,26 +15,189 @@ | |
| # Option A: issue number at start of branch or after /, followed by - or end (e.g. feature/1-test-issue, 123-fix) | ||
| BRANCH_ISSUE_PATTERN = re.compile(r"(?:^|/)(\d{1,6})(?=-|$)") | ||
|
|
||
| # Max number of tickets to analyse per PR, and max characters of ticket body to keep. | ||
| MAX_TICKETS = 3 | ||
| MAX_TICKET_CHARACTERS = 10000 | ||
|
|
||
| # Jira REST API version. Pinned to "2" because extract_jira_tickets() reads description | ||
| # and custom fields as plain strings. v2 returns them that way; v3 returns ADF JSON dicts | ||
| # that would need separate parsing. The atlassian-python-api client defaults to "2" today, | ||
| # but pinning keeps the contract stable across dependency upgrades. | ||
| JIRA_API_VERSION = "2" | ||
|
|
||
| def find_jira_tickets(text): | ||
| # Regular expression patterns for JIRA tickets | ||
| # Regular expression patterns for JIRA tickets. Matching is case-insensitive so | ||
| # lowercased branch names (e.g. bugfix/abc-123-description) are detected; keys are | ||
| # normalized to upper case to match Jira's canonical form. | ||
| patterns = [ | ||
| r'\b[A-Z]{2,10}-\d{1,7}\b', # Standard JIRA ticket format (e.g., PROJ-123) | ||
| r'(?:https?://[^\s/]+/browse/)?([A-Z]{2,10}-\d{1,7})\b' # JIRA URL or just the ticket | ||
| ] | ||
|
|
||
| tickets = set() | ||
| # Preserve first-seen order while de-duplicating, so the MAX_TICKETS cap applied | ||
| # later is deterministic across runs (a plain set would fetch arbitrary tickets). | ||
| seen = set() | ||
| tickets = [] | ||
| for pattern in patterns: | ||
| matches = re.findall(pattern, text) | ||
| matches = re.findall(pattern, text, flags=re.IGNORECASE) | ||
| for match in matches: | ||
| if isinstance(match, tuple): | ||
| # If it's a tuple (from the URL pattern), take the last non-empty group | ||
| ticket = next((m for m in reversed(match) if m), None) | ||
| else: | ||
| ticket = match | ||
| if ticket: | ||
| tickets.add(ticket) | ||
| ticket = ticket.upper() | ||
| if ticket not in seen: | ||
| seen.add(ticket) | ||
| tickets.append(ticket) | ||
|
|
||
| return list(tickets) | ||
| return tickets | ||
|
|
||
|
|
||
| def _get_jira_client(): | ||
| """ | ||
| Build a Jira client from the [jira] settings. Returns None if Jira is not configured. | ||
| Cloud uses email + API token; Server/Data Center uses username + password, or a PAT | ||
| (passed as the token) together with a base url. The REST API version is pinned via | ||
| JIRA_API_VERSION (see its definition for why). | ||
| """ | ||
| base_url = get_settings().get("JIRA.JIRA_BASE_URL", None) | ||
| api_email = get_settings().get("JIRA.JIRA_API_EMAIL", None) | ||
| api_token = get_settings().get("JIRA.JIRA_API_TOKEN", None) | ||
| if not (base_url and api_token): | ||
| # Warn only when Jira is partially configured: some [jira] value is set but the | ||
| # required base_url + api_token pair is incomplete, which is likely a mistake. | ||
| # Stay silent when nothing is set, since that just means Jira is not in use. | ||
| if any([base_url, api_email, api_token]): | ||
| missing = [ | ||
| name for name, value in ( | ||
| ("jira_base_url", base_url), | ||
| ("jira_api_token", api_token), | ||
| ) if not value | ||
| ] | ||
| get_logger().warning( | ||
| f"Jira is partially configured; skipping Jira ticket lookup. Missing: {', '.join(missing)}") | ||
| return None | ||
| url = base_url.rstrip("/") | ||
| try: | ||
| if api_email: | ||
| return Jira(url=url, username=api_email, password=api_token, api_version=JIRA_API_VERSION) | ||
| # No email/username: treat the token as a Server/Data Center PAT. | ||
| return Jira(url=url, token=api_token, api_version=JIRA_API_VERSION) | ||
| except Exception as e: | ||
| get_logger().error(f"Failed to initialize Jira client: {e}", | ||
| artifact={"traceback": traceback.format_exc()}) | ||
| return None | ||
|
Comment on lines
+116
to
+121
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1. _get_jira_client() catches broad exception New Jira provider-initialization and ticket-fetch logic uses broad except Exception as e and continues/returns None, which can silently mask unexpected failures and make misconfiguration harder to detect. Compliance requires narrow, explicit exception handling (or immediate re-raise with preserved context) in parsing/credential/provider-init paths. Agent Prompt
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expanding on why I am not narrowing the exception handling here.
Two reasons, one about consistency and one about what the library actually raises. 1. Broad
The new Jira handlers deliberately match that shape. Narrowing only the new code would make this file inconsistent with its own neighbours (and with ~312 2. The library does not give a clean typed contract to narrow to on these paths. If tighter exception handling is wanted, I would suggest it as a separate, file-wide cleanup so the convention stays uniform, rather than singling out the new Jira code. Happy to do that if you would like.
Comment on lines
+116
to
+121
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1. _get_jira_client() catches exception The new Jira initialization/fetch code uses broad except Exception handlers that swallow unexpected errors by returning None/skipping tickets, which can hide bugs and make failures harder to diagnose consistently. Compliance requires narrow, explicit exception handling (and only broad handlers when immediately re-raising with preserved context). Agent Prompt
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was addressed in this comment above. |
||
|
|
||
|
|
||
| def extract_jira_tickets(text, max_characters=MAX_TICKET_CHARACTERS): | ||
| """ | ||
| Find Jira ticket keys in the given text and fetch their content. Returns a list of | ||
| ticket dicts in the same shape used by the rest of the ticket-analysis flow. Returns | ||
| an empty list when no keys are found or when Jira is not configured. | ||
| """ | ||
| # Look for keys before building a client: most PRs have none, and building the | ||
| # client first would do needless work (and log a noisy init failure if Jira is | ||
| # misconfigured) even when there is nothing to fetch. | ||
| keys = find_jira_tickets(text or "") | ||
| if not keys: | ||
| return [] | ||
|
|
||
| jira_client = _get_jira_client() | ||
| if jira_client is None: | ||
| return [] | ||
|
|
||
| base_url = get_settings().get("JIRA.JIRA_BASE_URL", "").rstrip("/") | ||
| # Custom field that holds acceptance criteria / requirements. The field id is | ||
| # instance-specific (e.g. "customfield_10127"), so it must be configured; empty | ||
| # means no requirements are extracted. | ||
| requirements_field = get_settings().get("JIRA.JIRA_REQUIREMENTS_FIELD", "") or "" | ||
| if len(keys) > MAX_TICKETS: | ||
| get_logger().info(f"Too many Jira tickets found: {len(keys)}; limiting to {MAX_TICKETS}") | ||
| keys = keys[:MAX_TICKETS] | ||
|
|
||
| tickets_content = [] | ||
| for key in keys: | ||
| try: | ||
| issue = jira_client.issue(key) | ||
| except Exception as e: | ||
| get_logger().warning(f"Failed to fetch Jira ticket {key}: {e}") | ||
| continue | ||
| if not issue: | ||
| continue | ||
|
|
||
| fields = issue.get("fields", {}) or {} | ||
| # The client is pinned to REST v2 (see _get_jira_client), which returns | ||
| # description and rich-text custom fields as plain wiki-markup strings. The | ||
| # isinstance guards below defend against anything non-string (e.g. v3 ADF dicts). | ||
| body = fields.get("description") or "" | ||
| if not isinstance(body, str): | ||
| body = "" | ||
| if len(body) > max_characters: | ||
| body = body[:max_characters] + "..." | ||
|
|
||
| requirements = "" | ||
| if requirements_field: | ||
| requirements = fields.get(requirements_field) or "" | ||
| if not isinstance(requirements, str): | ||
| requirements = "" | ||
| if len(requirements) > max_characters: | ||
| requirements = requirements[:max_characters] + "..." | ||
|
|
||
| labels = fields.get("labels", []) or [] | ||
| tickets_content.append({ | ||
| "ticket_id": key, | ||
| "ticket_url": f"{base_url}/browse/{key}" if base_url else "", | ||
| "title": fields.get("summary", ""), | ||
| "body": body, | ||
| "requirements": requirements, | ||
| "labels": ", ".join(labels), | ||
| }) | ||
| return tickets_content | ||
|
|
||
|
|
||
| def _get_pr_title(git_provider) -> str: | ||
| """Return the PR/MR title across providers (GitHub/Bitbucket use .pr, GitLab .mr).""" | ||
| for attr in ("pr", "mr"): | ||
| obj = getattr(git_provider, attr, None) | ||
| title = getattr(obj, "title", None) | ||
| if title: | ||
| return title | ||
| return "" | ||
|
|
||
|
|
||
| def add_jira_tickets(git_provider, tickets_content): | ||
| """ | ||
| Provider-agnostic Jira lookup. Scans the PR title, description and branch name for | ||
| Jira ticket keys and appends any found tickets to tickets_content (de-duplicated by | ||
| ticket_url). No-op when Jira is not configured. Works for any git provider, since it | ||
| only relies on get_user_description() and get_pr_branch(). | ||
|
|
||
| MAX_TICKETS is the overall per-PR cap, so any provider-native tickets already in | ||
| tickets_content count against it: Jira tickets are appended only until the combined | ||
| total reaches MAX_TICKETS, keeping the existing tickets first. | ||
| """ | ||
| try: | ||
| if len(tickets_content) >= MAX_TICKETS: | ||
| return tickets_content | ||
| jira_context = "\n".join(filter(None, [ | ||
| _get_pr_title(git_provider), | ||
| git_provider.get_user_description() or "", | ||
| git_provider.get_pr_branch() or "", | ||
| ])) | ||
| existing_urls = {t.get("ticket_url") for t in tickets_content} | ||
| for jira_ticket in extract_jira_tickets(jira_context, MAX_TICKET_CHARACTERS): | ||
| if len(tickets_content) >= MAX_TICKETS: | ||
| get_logger().info( | ||
| f"Reached the per-PR cap of {MAX_TICKETS} tickets; skipping remaining Jira tickets") | ||
| break | ||
| if jira_ticket.get("ticket_url") not in existing_urls: | ||
| tickets_content.append(jira_ticket) | ||
| except Exception as e: | ||
| get_logger().error(f"Error extracting Jira tickets: {e}", | ||
| artifact={"traceback": traceback.format_exc()}) | ||
| return tickets_content | ||
|
|
||
|
|
||
| def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url_html='https://github.com'): | ||
|
|
@@ -55,10 +220,10 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url | |
| if issue_number.isdigit() and len(issue_number) < 5 and repo_path: | ||
| github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}') | ||
|
|
||
| if len(github_tickets) > 3: | ||
| if len(github_tickets) > MAX_TICKETS: | ||
| get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}") | ||
| # Limit the number of tickets to 3 | ||
| github_tickets = set(list(github_tickets)[:3]) | ||
| # Limit the number of tickets | ||
| github_tickets = set(list(github_tickets)[:MAX_TICKETS]) | ||
|
Comment on lines
+253
to
+256
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1. Nondeterministic github_tickets truncation When too many GitHub issue links are found, the code enforces the MAX_TICKETS cap by converting an unordered set to a list and slicing, making which tickets are kept nondeterministic across runs. This can lead to inconsistent related ticket context (and thus inconsistent compliance analysis inputs) and flaky behavior dependent on hash iteration order. Agent Prompt
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On finding:
This is a real bug, but it is pre-existing in To keep this PR scoped to Jira support, I have split the fix out separately:
Happy to rebase this PR on top of that once it merges if you would prefer the constant rename not to touch that line here in the meantime. |
||
| except Exception as e: | ||
| get_logger().error(f"Error extracting tickets error= {e}", | ||
| artifact={"traceback": traceback.format_exc()}) | ||
|
|
@@ -106,8 +271,9 @@ def extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html= | |
|
|
||
|
|
||
| async def extract_tickets(git_provider): | ||
| MAX_TICKET_CHARACTERS = 10000 | ||
| try: | ||
| tickets_content = [] | ||
|
|
||
| if isinstance(git_provider, GithubProvider): | ||
| user_description = git_provider.get_user_description() | ||
| description_tickets = extract_ticket_links_from_pr_description( | ||
|
|
@@ -123,12 +289,11 @@ async def extract_tickets(git_provider): | |
| if link not in seen: | ||
| seen.add(link) | ||
| merged.append(link) | ||
| if len(merged) > 3: | ||
| if len(merged) > MAX_TICKETS: | ||
| get_logger().info(f"Too many tickets (description + branch): {len(merged)}") | ||
| tickets = merged[:3] | ||
| tickets = merged[:MAX_TICKETS] | ||
| else: | ||
| tickets = merged | ||
| tickets_content = [] | ||
|
|
||
| if tickets: | ||
|
|
||
|
|
@@ -188,11 +353,8 @@ async def extract_tickets(git_provider): | |
| 'sub_issues': sub_issues_content # Store sub-issues content | ||
| }) | ||
|
|
||
| return tickets_content | ||
|
|
||
| elif isinstance(git_provider, AzureDevopsProvider): | ||
| tickets_info = git_provider.get_linked_work_items() | ||
| tickets_content = [] | ||
| for ticket in tickets_info: | ||
| try: | ||
| ticket_body_str = ticket.get("body", "") | ||
|
|
@@ -214,7 +376,13 @@ async def extract_tickets(git_provider): | |
| f"Error processing Azure DevOps ticket: {e}", | ||
| artifact={"traceback": traceback.format_exc()}, | ||
| ) | ||
| return tickets_content | ||
|
|
||
| # Provider-agnostic Jira lookup. Tickets are often referenced by key in the PR | ||
| # title, description or branch name rather than via a provider-native link, so | ||
| # this runs for every provider and is a no-op when Jira is not configured. | ||
| add_jira_tickets(git_provider, tickets_content) | ||
|
|
||
| return tickets_content | ||
|
qodo-free-for-open-source-projects[bot] marked this conversation as resolved.
Comment on lines
+418
to
+423
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 1. Blocking jira i/o in async extract_tickets() is async but calls add_jira_tickets()/extract_jira_tickets() which perform synchronous Jira HTTP requests (jira_client.issue) inline. This blocks the event loop during review execution and can reduce FastAPI throughput / cause latency spikes under concurrent reviews. Agent Prompt
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Accurate that Verified against
So the event loop is already blocked on synchronous ticket I/O for the existing GitHub and Azure providers; the Jira call added here is consistent with that. Wrapping only the Jira call in A proper fix — offloading all the ticket I/O in |
||
|
|
||
| except Exception as e: | ||
| get_logger().error(f"Error extracting tickets error= {e}", | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.