diff --git a/tools/generate_meta.py b/tools/generate_meta.py index 20c10e9..2306081 100755 --- a/tools/generate_meta.py +++ b/tools/generate_meta.py @@ -39,90 +39,94 @@ def remove_thinking_content(text): return text.strip() +def extract_description(result): + """Extract description from API response and clean it.""" + # Chat completion format: choices[0].message.content + if "choices" in result and len(result["choices"]) > 0: + message = result["choices"][0].get("message", {}) + description = message.get("content", "") + elif "response" in result: + description = result["response"].strip() + elif isinstance(result, dict) and "text" in result: + description = result["text"].strip() + else: + return None + + description = remove_thinking_content(description) + description = description.strip() + + if not description or description.isspace(): + return None + + # Extract only the first sentence + parts = description.split(".") + first_sentence = parts[0].strip() + "." + if len(first_sentence) <= 1: + first_sentence = description[:160].strip() + "." + if len(first_sentence) > 160: + first_sentence = first_sentence[:157] + "..." + return first_sentence + + def generate_description_with_llm(text, service_title, llm_api_url, model_name, api_username, api_password): - """Generate a meta description using the LLM API.""" + """Generate a meta description using the llama.cpp /completion endpoint with up to 3 retries.""" + # Limit content to first 500 chars for speed, focus on content not schema + content_preview = text[:500].replace("\n", " ") prompt = ( - "/no_think\n" - f"Generate a concise HTML meta description (maximum 160 characters, minimum 40 characters) " - f"for the following documentation content of the service '{service_title}'. " - f"The description should be suitable for search engines and summarize the content. " - f"Do not include any markdown formatting, quotes, or meta-commentary.\n\n" - f"Content:\n{text[:2000]}\n\n" - f"Meta description:" + f"Generate a meta description (40-160 chars) for: {service_title}." + f"Content preview: {content_preview}." + f"Output ONLY the description text, nothing else." ) - try: - headers = {"Content-Type": "application/json"} - if api_username and api_password: - credentials = f"{api_username}:{api_password}" - encoded_credentials = base64.b64encode(credentials.encode()).decode() - headers["Authorization"] = f"Basic {encoded_credentials}" + headers = {"Content-Type": "application/json"} + if api_username and api_password: + credentials = f"{api_username}:{api_password}" + encoded_credentials = base64.b64encode(credentials.encode()).decode() + headers["Authorization"] = f"Basic {encoded_credentials}" - response = requests.post( - llm_api_url, - json={ - "prompt": prompt, - "model": model_name, - "temperature": 0.2, + # Try up to 3 times + for attempt in range(3): + try: + response = requests.post( + llm_api_url, + json={ + "messages": [ + {"role": "user", "content": prompt}, + ], + "model": model_name, + "temperature": 0.2, - "repeat_last_n": 128, - "repeat_penalty": 1.15, - "presence_penalty": 0.2, - "frequency_penalty": 0.2, + "top_k": 40, + "top_p": 0.9, + "min_p": 0.05, - # optional DRY anti-looping (try only if it still loops) - "dry_multiplier": 0.5, - "dry_base": 1.75, - "dry_allowed_length": 2, + "repeat_last_n": 256, + "repeat_penalty": 1.18, + "presence_penalty": 0.2, + "frequency_penalty": 0.2, - }, - headers=headers, - timeout=15, - ) - response.raise_for_status() - result = response.json() - if "choices" in result and len(result["choices"]) > 0: - description = result["choices"][0]["text"].strip() - description = remove_thinking_content(description) - # If description is empty or just whitespace, use fallback - if not description or description.isspace(): - return f"{service_title} documentation" - # Extract only the first sentence (meta description should be one sentence) - parts = description.split(".") - first_sentence = parts[0].strip() + "." - if len(first_sentence) <= 1: - first_sentence = description[:160].strip() + "." - if len(first_sentence) > 160: - first_sentence = first_sentence[:157] + "..." - return first_sentence - elif "response" in result: - description = result["response"].strip() - description = remove_thinking_content(description) - if not description or description.isspace(): - return f"{service_title} documentation" - first_sentence = description.split(".")[0].strip() + "." - if len(first_sentence) <= 1: - first_sentence = description[:160].strip() + "." - if len(first_sentence) > 160: - first_sentence = first_sentence[:157] + "..." - return first_sentence - elif isinstance(result, dict) and "text" in result: - description = result["text"].strip() - description = remove_thinking_content(description) - if not description or description.isspace(): - return f"{service_title} documentation" - first_sentence = description.split(".")[0].strip() + "." - if len(first_sentence) <= 1: - first_sentence = description[:160].strip() + "." - if len(first_sentence) > 160: - first_sentence = first_sentence[:157] + "..." - return first_sentence - except requests.exceptions.RequestException as e: - logging.warning(f"LLM API request failed: {e}. Using fallback description.") - except (KeyError, ValueError, IndexError) as e: - logging.warning(f"LLM API response parsing failed: {e}. Using fallback description.") + "dry_multiplier": 0.8, + "dry_base": 1.75, + "dry_allowed_length": 2, + "dry_penalty_last_n": -1, + "chat_template_kwargs": {"enable_thinking": False}, + }, + headers=headers, + timeout=15, + ) + response.raise_for_status() + result = response.json() + description = extract_description(result) + if description: + return description + logging.warning(f"Attempt {attempt + 1}: Empty or invalid response from LLM API.") + except requests.exceptions.RequestException as e: + logging.warning(f"Attempt {attempt + 1}: LLM API request failed: {e}. Retrying...") + except (KeyError, ValueError, IndexError) as e: + logging.warning(f"Attempt {attempt + 1}: LLM API response parsing failed: {e}. Retrying...") - # Fallback: Extract first sentence from content + # After all retries failed, use fallback + logging.warning("All LLM API retries failed. Using fallback description.") lines = text.split("\n") for line in lines: line = line.strip() @@ -324,8 +328,8 @@ def main(): parser.add_argument("--token", metavar="token", help="API token") parser.add_argument( "--llm-api-url", - default="http://localhost:8080/v1/completions", - help="URL of the LLM API server. Default: http://localhost:8080/v1/completions", + default="http://localhost:8080/v1/chat/completions", + help="URL of the LLM API server. Default: http://localhost:8080/v1/chat/completions", ) parser.add_argument( "--llm-model",