forked from docs/doc-exports
150 lines
4.6 KiB
Python
Executable File
150 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Check that newly added HTML files are registered in both metadata files.
|
|
|
|
This script validates that every new HTML file added in a PR is properly
|
|
registered in both CLASS.TXT.json and ALL_META.TXT.json in the same directory.
|
|
|
|
Directory structure:
|
|
docs/vpc/api-ref/
|
|
├── CLASS.TXT.json # Contains uri references to HTML files
|
|
├── ALL_META.TXT.json # Contains uri references to HTML files
|
|
└── en-us_topic_XXXXX.html
|
|
|
|
Both metadata files use the "uri" field to reference HTML filenames. When a
|
|
new HTML file is added, it must be added to both metadata files.
|
|
|
|
Checks performed:
|
|
1. Does CLASS.TXT.json exist in the HTML file's directory?
|
|
2. Does ALL_META.TXT.json exist in the HTML file's directory?
|
|
3. Is the HTML filename listed in CLASS.TXT.json (in uri field)?
|
|
4. Is the HTML filename listed in ALL_META.TXT.json (in uri field)?
|
|
|
|
Usage:
|
|
Set ADDED_FILES environment variable with space-separated list of added HTML files.
|
|
Exits with code 1 if violations found, 0 otherwise.
|
|
Writes violations to violations.json for comment generation.
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
|
|
|
|
def get_uris_from_metadata(json_path):
|
|
"""Extract all uri values from a metadata JSON file."""
|
|
uris = set()
|
|
|
|
try:
|
|
with open(json_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
for entry in data:
|
|
uri = entry.get("uri", "")
|
|
if uri:
|
|
uris.add(uri)
|
|
except json.JSONDecodeError as e:
|
|
print(f"Warning: Error parsing {json_path}: {e}")
|
|
except Exception as e:
|
|
print(f"Warning: Error reading {json_path}: {e}")
|
|
|
|
return uris
|
|
|
|
|
|
def check_html_file_registration(html_file, base_dir):
|
|
"""
|
|
Check if an HTML file is registered in both metadata files.
|
|
|
|
Args:
|
|
html_file: Full path to the HTML file
|
|
base_dir: Base directory of the repository
|
|
|
|
Returns:
|
|
dict with:
|
|
- 'file': HTML file path
|
|
- 'missing_from': list of metadata files it's missing from
|
|
- 'directory': directory containing the HTML file
|
|
"""
|
|
result = {"file": html_file, "missing_from": [], "directory": ""}
|
|
|
|
# Get relative path from base directory
|
|
rel_path = os.path.relpath(html_file, base_dir)
|
|
result["file"] = rel_path
|
|
|
|
# Get directory and filename
|
|
dir_path = os.path.dirname(rel_path)
|
|
filename = os.path.basename(rel_path)
|
|
result["directory"] = dir_path
|
|
|
|
# Check if metadata files exist
|
|
class_txt_path = os.path.join(dir_path, "CLASS.TXT.json")
|
|
all_meta_path = os.path.join(dir_path, "ALL_META.TXT.json")
|
|
|
|
# Check CLASS.TXT.json
|
|
if os.path.exists(class_txt_path):
|
|
class_uris = get_uris_from_metadata(class_txt_path)
|
|
if filename not in class_uris:
|
|
result["missing_from"].append("CLASS.TXT.json")
|
|
else:
|
|
result["missing_from"].append("CLASS.TXT.json (file not found)")
|
|
|
|
# Check ALL_META.TXT.json
|
|
if os.path.exists(all_meta_path):
|
|
meta_uris = get_uris_from_metadata(all_meta_path)
|
|
if filename not in meta_uris:
|
|
result["missing_from"].append("ALL_META.TXT.json")
|
|
else:
|
|
result["missing_from"].append("ALL_META.TXT.json (file not found)")
|
|
|
|
return result
|
|
|
|
|
|
def main():
|
|
# Get base directory (repository root)
|
|
base_dir = os.getcwd()
|
|
|
|
# Get changed files from environment
|
|
changed_files_str = os.environ.get("ADDED_FILES", "")
|
|
added_files = [f.strip() for f in changed_files_str.split() if f.strip()]
|
|
|
|
if not added_files:
|
|
print("No HTML files added in this PR")
|
|
sys.exit(0)
|
|
|
|
print(f"Checking {len(added_files)} added HTML file(s)")
|
|
|
|
all_violations = []
|
|
|
|
for html_file in added_files:
|
|
# Check if file exists
|
|
if not os.path.exists(html_file):
|
|
print(f"Warning: File not found: {html_file}")
|
|
continue
|
|
|
|
# Check registration
|
|
result = check_html_file_registration(html_file, base_dir)
|
|
|
|
if result["missing_from"]:
|
|
all_violations.append(result)
|
|
|
|
# Output results
|
|
if all_violations:
|
|
# Write violations to JSON file for later use
|
|
with open("violations.json", "w") as f:
|
|
json.dump(all_violations, f, indent=2)
|
|
|
|
print(f"\nFound {len(all_violations)} HTML file(s) not properly registered:")
|
|
for v in all_violations:
|
|
print(f" {v['file']}")
|
|
for missing in v["missing_from"]:
|
|
print(f" ❌ Missing from: {missing}")
|
|
|
|
sys.exit(1)
|
|
else:
|
|
print("\nAll added HTML files are properly registered in metadata")
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|