diff --git a/tools/generate_meta.py b/tools/generate_meta.py
index 20c10e9..2306081 100755
--- a/tools/generate_meta.py
+++ b/tools/generate_meta.py
@@ -39,90 +39,94 @@ def remove_thinking_content(text):
     return text.strip()
 
 
+def extract_description(result):
+    """Extract description from API response and clean it."""
+    # Chat completion format: choices[0].message.content
+    if "choices" in result and len(result["choices"]) > 0:
+        message = result["choices"][0].get("message", {})
+        description = message.get("content", "")
+    elif "response" in result:
+        description = result["response"].strip()
+    elif isinstance(result, dict) and "text" in result:
+        description = result["text"].strip()
+    else:
+        return None
+
+    description = remove_thinking_content(description)
+    description = description.strip()
+
+    if not description or description.isspace():
+        return None
+
+    # Extract only the first sentence
+    parts = description.split(".")
+    first_sentence = parts[0].strip() + "."
+    if len(first_sentence) <= 1:
+        first_sentence = description[:160].strip() + "."
+    if len(first_sentence) > 160:
+        first_sentence = first_sentence[:157] + "..."
+    return first_sentence
+
+
 def generate_description_with_llm(text, service_title, llm_api_url, model_name, api_username, api_password):
-    """Generate a meta description using the LLM API."""
+    """Generate a meta description using the llama.cpp /completion endpoint with up to 3 retries."""
+    # Limit content to first 500 chars for speed, focus on content not schema
+    content_preview = text[:500].replace("\n", " ")
     prompt = (
-        "/no_think\n"
-        f"Generate a concise HTML meta description (maximum 160 characters, minimum 40 characters) "
-        f"for the following documentation content of the service '{service_title}'. "
-        f"The description should be suitable for search engines and summarize the content. "
-        f"Do not include any markdown formatting, quotes, or meta-commentary.\n\n"
-        f"Content:\n{text[:2000]}\n\n"
-        f"Meta description:"
+        f"Generate a meta description (40-160 chars) for: {service_title}."
+        f"Content preview: {content_preview}."
+        f"Output ONLY the description text, nothing else."
     )
 
-    try:
-        headers = {"Content-Type": "application/json"}
-        if api_username and api_password:
-            credentials = f"{api_username}:{api_password}"
-            encoded_credentials = base64.b64encode(credentials.encode()).decode()
-            headers["Authorization"] = f"Basic {encoded_credentials}"
+    headers = {"Content-Type": "application/json"}
+    if api_username and api_password:
+        credentials = f"{api_username}:{api_password}"
+        encoded_credentials = base64.b64encode(credentials.encode()).decode()
+        headers["Authorization"] = f"Basic {encoded_credentials}"
 
-        response = requests.post(
-            llm_api_url,
-            json={
-                "prompt": prompt,
-                "model": model_name,
-                "temperature": 0.2,
+    # Try up to 3 times
+    for attempt in range(3):
+        try:
+            response = requests.post(
+                llm_api_url,
+                json={
+                    "messages": [
+                        {"role": "user", "content": prompt},
+                    ],
+                    "model": model_name,
+                    "temperature": 0.2,
 
-                "repeat_last_n": 128,
-                "repeat_penalty": 1.15,
-                "presence_penalty": 0.2,
-                "frequency_penalty": 0.2,
+                    "top_k": 40,
+                    "top_p": 0.9,
+                    "min_p": 0.05,
 
-                # optional DRY anti-looping (try only if it still loops)
-                "dry_multiplier": 0.5,
-                "dry_base": 1.75,
-                "dry_allowed_length": 2,
+                    "repeat_last_n": 256,
+                    "repeat_penalty": 1.18,
+                    "presence_penalty": 0.2,
+                    "frequency_penalty": 0.2,
 
-            },
-            headers=headers,
-            timeout=15,
-        )
-        response.raise_for_status()
-        result = response.json()
-        if "choices" in result and len(result["choices"]) > 0:
-            description = result["choices"][0]["text"].strip()
-            description = remove_thinking_content(description)
-            # If description is empty or just whitespace, use fallback
-            if not description or description.isspace():
-                return f"{service_title} documentation"
-            # Extract only the first sentence (meta description should be one sentence)
-            parts = description.split(".")
-            first_sentence = parts[0].strip() + "."
-            if len(first_sentence) <= 1:
-                first_sentence = description[:160].strip() + "."
-            if len(first_sentence) > 160:
-                first_sentence = first_sentence[:157] + "..."
-            return first_sentence
-        elif "response" in result:
-            description = result["response"].strip()
-            description = remove_thinking_content(description)
-            if not description or description.isspace():
-                return f"{service_title} documentation"
-            first_sentence = description.split(".")[0].strip() + "."
-            if len(first_sentence) <= 1:
-                first_sentence = description[:160].strip() + "."
-            if len(first_sentence) > 160:
-                first_sentence = first_sentence[:157] + "..."
-            return first_sentence
-        elif isinstance(result, dict) and "text" in result:
-            description = result["text"].strip()
-            description = remove_thinking_content(description)
-            if not description or description.isspace():
-                return f"{service_title} documentation"
-            first_sentence = description.split(".")[0].strip() + "."
-            if len(first_sentence) <= 1:
-                first_sentence = description[:160].strip() + "."
-            if len(first_sentence) > 160:
-                first_sentence = first_sentence[:157] + "..."
-            return first_sentence
-    except requests.exceptions.RequestException as e:
-        logging.warning(f"LLM API request failed: {e}. Using fallback description.")
-    except (KeyError, ValueError, IndexError) as e:
-        logging.warning(f"LLM API response parsing failed: {e}. Using fallback description.")
+                    "dry_multiplier": 0.8,
+                    "dry_base": 1.75,
+                    "dry_allowed_length": 2,
+                    "dry_penalty_last_n": -1,
+                    "chat_template_kwargs": {"enable_thinking": False},
+                },
+                headers=headers,
+                timeout=15,
+            )
+            response.raise_for_status()
+            result = response.json()
+            description = extract_description(result)
+            if description:
+                return description
+            logging.warning(f"Attempt {attempt + 1}: Empty or invalid response from LLM API.")
+        except requests.exceptions.RequestException as e:
+            logging.warning(f"Attempt {attempt + 1}: LLM API request failed: {e}. Retrying...")
+        except (KeyError, ValueError, IndexError) as e:
+            logging.warning(f"Attempt {attempt + 1}: LLM API response parsing failed: {e}. Retrying...")
 
-    # Fallback: Extract first sentence from content
+    # After all retries failed, use fallback
+    logging.warning("All LLM API retries failed. Using fallback description.")
     lines = text.split("\n")
     for line in lines:
         line = line.strip()
@@ -324,8 +328,8 @@ def main():
     parser.add_argument("--token", metavar="token", help="API token")
     parser.add_argument(
         "--llm-api-url",
-        default="http://localhost:8080/v1/completions",
-        help="URL of the LLM API server. Default: http://localhost:8080/v1/completions",
+        default="http://localhost:8080/v1/chat/completions",
+        help="URL of the LLM API server. Default: http://localhost:8080/v1/chat/completions",
     )
     parser.add_argument(
         "--llm-model",