diff --git a/tools/generate_meta.py b/tools/generate_meta.py index 2306081..099704f 100755 --- a/tools/generate_meta.py +++ b/tools/generate_meta.py @@ -17,7 +17,6 @@ import argparse import base64 import logging import pathlib -import re import requests import subprocess import sys @@ -32,16 +31,8 @@ data = otc_metadata.services.Services() api_session = requests.Session() -def remove_thinking_content(text): - """Remove thinking process content between thinking markers.""" - # Remove everything between and markers - text = re.sub(r'(?is).*?', '', text, flags=re.DOTALL | re.IGNORECASE) - return text.strip() - - def extract_description(result): """Extract description from API response and clean it.""" - # Chat completion format: choices[0].message.content if "choices" in result and len(result["choices"]) > 0: message = result["choices"][0].get("message", {}) description = message.get("content", "") @@ -52,7 +43,6 @@ def extract_description(result): else: return None - description = remove_thinking_content(description) description = description.strip() if not description or description.isspace(): @@ -125,16 +115,20 @@ def generate_description_with_llm(text, service_title, llm_api_url, model_name, except (KeyError, ValueError, IndexError) as e: logging.warning(f"Attempt {attempt + 1}: LLM API response parsing failed: {e}. Retrying...") - # After all retries failed, use fallback - logging.warning("All LLM API retries failed. Using fallback description.") + # After all retries failed, use fallback - extract first headline + logging.warning("All LLM API retries failed. Using fallback description from first headline.") lines = text.split("\n") - for line in lines: - line = line.strip() - if line and not line.startswith("-") and not line.startswith("#"): - first_sentence = line.split(".")[0] + "." - if len(first_sentence) > 160: - first_sentence = first_sentence[:157] + "..." - return first_sentence + for i, line in enumerate(lines): + line_stripped = line.strip() + if line_stripped and not line_stripped.startswith("-") and not line_stripped.startswith("#"): + # Check if next line is a headline underline (=== or ---) + if i + 1 < len(lines): + next_line = lines[i + 1].strip() + if next_line and all(c in "=-" for c in next_line): + description = line_stripped + if len(description) > 160: + description = description[:157] + "..." + return description return f"{service_title} documentation"