diff --git a/.gitea/workflows/create-weekly-analytics-stats.yaml b/.gitea/workflows/create-weekly-analytics-stats.yaml new file mode 100644 index 0000000..a684238 --- /dev/null +++ b/.gitea/workflows/create-weekly-analytics-stats.yaml @@ -0,0 +1,62 @@ +name: Create Weekly Analytics Stats + +on: + schedule: + # 03:00 UTC = 04:00 CET + - cron: "0 3 * * 1" + workflow_dispatch: + +jobs: + run-analytics: + runs-on: ubuntu + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} # für Push-Berechtigung + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests otc-metadata + + - name: Run analytics for eu_de + env: + UMAMI_USERNAME: ${{ secrets.UMAMI_USERNAME }} + UMAMI_PASSWORD: ${{ secrets.UMAMI_PASSWORD }} + run: | + chmod +x ./collect_statistics.py + ./collect_statistics.py \ + --website-id "${{ secrets.UMAMI_WEBSITE_ID }}" \ + --cloud-environment "eu_de" \ + --environment "public" \ + --limit "10" + + - name: Run analytics for swiss + env: + UMAMI_USERNAME: ${{ secrets.UMAMI_USERNAME }} + UMAMI_PASSWORD: ${{ secrets.UMAMI_PASSWORD }} + run: | + ./collect_statistics.py \ + --website-id "${{ secrets.UMAMI_WEBSITE_ID }}" \ + --cloud-environment "swiss" \ + --environment "public" \ + --limit "10" + + - name: Commit and push results + run: | + git config --global user.name "gitea-actions[bot]" + git config --global user.email "actions@users.noreply.local" + git add analytics/ + if git diff --cached --quiet; then + echo "No changes to commit" + else + git commit -m "chore: update analytics data [skip ci]" + git push + fi diff --git a/tools/collect_statistics.py b/tools/collect_statistics.py new file mode 100644 index 0000000..ca93948 --- /dev/null +++ b/tools/collect_statistics.py @@ -0,0 +1,188 @@ +#!/usr/bin/python + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import requests +import json +from datetime import datetime, timedelta +import os +import otc_metadata.services +import argparse +import logging + +# ===== Configuration ===== +USERNAME = os.getenv("UMAMI_USERNAME") +PASSWORD = os.getenv("UMAMI_PASSWORD") +OUTPUT_FILE = "stats.json" + +# ===== Data ===== +blacklist = [ + "ed", + "sd" +] + +# ===== Logger ===== + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +def parse_args(): + """ + Command-line arguments + """ + parser = argparse.ArgumentParser(description="Analytics Script") + parser.add_argument( + "--base-url", + default="https://analytics.otc-service.com", + help="Base_Url of analytics server" + ) + parser.add_argument( + "--cloud-environment", + default="eu_de", + choices=['eu_de', 'swiss'], + help="Cloud Environments (default: eu_de)" + ) + parser.add_argument( + "--environment", + default=['public'], + nargs='+', + choices=['public', 'internal', 'hidden'], + help="Environments (default: ['public'])" + ) + parser.add_argument( + "--limit", + type=int, + default=10, + help="Result count" + ) + parser.add_argument( + "--website-id", + required=True, + help="Umami Website ID" + ) + + return parser.parse_args() + + +def get_umami_token(base_url): + """Get Bearer-Token from Umami-API.""" + url = f"{base_url}/api/auth/login" + response = requests.post(url, json={"username": USERNAME, "password": PASSWORD}) + response.raise_for_status() + return response.json().get("token") + + +def get_4_weeks_range(): + """Calculates start and end of 4 weeks range in UNIX timestamp format.""" + end_date = datetime.utcnow() + start_date = end_date - timedelta(weeks=4) + + start_ts = int(start_date.timestamp() * 1000) + end_ts = int(end_date.timestamp() * 1000) + return start_ts, end_ts + + +def fetch_pageviews(token, start_ts, end_ts, website_id, base_url): + """Retrieves statistics from API server.""" + headers = {"Authorization": f"Bearer {token}"} + url = f"{base_url}/api/websites/{website_id}/metrics" + params = { + "type": "url", + "startAt": start_ts, + "endAt": end_ts + } + + response = requests.get(url, headers=headers, params=params) + response.raise_for_status() + return response + + +def filter_unique_service_types(stats, cloud_environment, environment, limit): + """ + Filter stats and return unique service_type values that exist in metadata, + skip blacklisted service types, and limit results to `limit` entries. + """ + services = otc_metadata.services.Services().all_services_by_cloud_environment( + cloud_environment=cloud_environment, + environments=environment + ) + + # Map service_uri -> service_type + uri_to_type = {s["service_uri"]: s["service_type"] for s in services} + + seen_types = set() + filtered = [] + + for entry in stats.json(): + url_path = entry["x"] + + for service_uri, service_type in uri_to_type.items(): + if f"/{service_uri}" in url_path and service_type not in seen_types: + if service_type in blacklist: + continue + + filtered.append(service_type) + seen_types.add(service_type) + + if len(filtered) >= limit: + return filtered + break + + return filtered + + +def save_to_file(data, environment, cloud_environment): + """ + Saves data in the folder ./analytics//.json + """ + folder = os.path.join("analytics", environment) + os.makedirs(folder, exist_ok=True) + + filename = os.path.join(folder, f"{cloud_environment}.json") + abs_path = os.path.abspath(filename) + + with open(filename, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + + logger.info(f"✅ Data saved in: {abs_path}") + + +def main(): + try: + args = parse_args() + token = get_umami_token(base_url=args.base_url) + start_ts, end_ts = get_4_weeks_range() + stats = fetch_pageviews(token, start_ts, end_ts, website_id=args.website_id, base_url=args.base_url) + filtered_stats = filter_unique_service_types( + stats=stats, + cloud_environment=args.cloud_environment, + environment=args.environment, + limit=args.limit + ) + save_to_file( + data=filtered_stats, + environment=args.environment[0], + cloud_environment=args.cloud_environment + ) + except Exception as e: + logger.error(f"Error: {e}") + raise + + +if __name__ == "__main__": + main()