Home-AssistantConfig/config/packages/proxmox.yaml

######################################################################
# @CCOSTAN - Follow Me on X
# For more info visit https://www.vcloudinfo.com/click-here
# Original Repo : https://github.com/CCOSTAN/Home-AssistantConfig
# -------------------------------------------------------------------
# Proxmox Host Automations - reboots, repairs, and Joanna dispatch
#  Nightly Frigate host reboot plus update/runtime/disk health automations.
# -------------------------------------------------------------------
# Related Issue: 1584
# Notes: Creates HA repair issues when proxmox nodes report updates.
# Notes: Adds normalized runtime + disk health signals for dashboard/alerts.
# Notes: Joanna dispatch is reserved for sustained runtime and disk-pressure degradations.
# Notes: Normalized disk usage sensors expose state_class for long-term trend rollups.
######################################################################
template:
  - sensor:
      - name: "Proxmox1 Disk Used Percentage"
        unique_id: proxmox1_disk_used_percentage
        unit_of_measurement: "%"
        state_class: measurement
        icon: mdi:harddisk
        availability: >-
          {% set preferred = states('sensor.node_proxmox1_disk_used_percentage') %}
          {% set used = states('sensor.node_proxmox1_disk') %}
          {% set total = states('sensor.node_proxmox1_max_disk') %}
          {{ preferred not in ['unknown', 'unavailable', 'none', ''] or
             (used not in ['unknown', 'unavailable', 'none', ''] and
              total not in ['unknown', 'unavailable', 'none', ''] and
              (total | float(0)) > 0) }}
        state: >-
          {% set preferred = states('sensor.node_proxmox1_disk_used_percentage') %}
          {% if preferred not in ['unknown', 'unavailable', 'none', ''] %}
            {{ preferred | float(0) | round(1) }}
          {% else %}
            {% set used = states('sensor.node_proxmox1_disk') | float(0) %}
            {% set total = states('sensor.node_proxmox1_max_disk') | float(0) %}
            {% if total > 0 %}
              {{ ((used / total) * 100) | round(1) }}
            {% else %}
              0
            {% endif %}
          {% endif %}

      - name: "Proxmox02 Disk Used Percentage"
        unique_id: proxmox02_disk_used_percentage
        unit_of_measurement: "%"
        state_class: measurement
        icon: mdi:harddisk
        availability: >-
          {% set preferred = states('sensor.node_proxmox02_disk_used_percentage') %}
          {% set used = states('sensor.node_proxmox02_disk') %}
          {% set total = states('sensor.node_proxmox02_max_disk') %}
          {{ preferred not in ['unknown', 'unavailable', 'none', ''] or
             (used not in ['unknown', 'unavailable', 'none', ''] and
              total not in ['unknown', 'unavailable', 'none', ''] and
              (total | float(0)) > 0) }}
        state: >-
          {% set preferred = states('sensor.node_proxmox02_disk_used_percentage') %}
          {% if preferred not in ['unknown', 'unavailable', 'none', ''] %}
            {{ preferred | float(0) | round(1) }}
          {% else %}
            {% set used = states('sensor.node_proxmox02_disk') | float(0) %}
            {% set total = states('sensor.node_proxmox02_max_disk') | float(0) %}
            {% if total > 0 %}
              {{ ((used / total) * 100) | round(1) }}
            {% else %}
              0
            {% endif %}
          {% endif %}

  - binary_sensor:
      - name: "Proxmox1 Runtime Healthy"
        unique_id: proxmox1_runtime_healthy
        device_class: running
        state: >-
          {% set state_value = states('binary_sensor.node_proxmox1_status') %}
          {% if state_value in ['on', 'off'] %}
            {{ state_value == 'on' }}
          {% else %}
            {% set status = states('sensor.node_proxmox1_status') | lower %}
            {{ status in ['online', 'running', 'on'] }}
          {% endif %}

      - name: "Proxmox02 Runtime Healthy"
        unique_id: proxmox02_runtime_healthy
        device_class: running
        state: >-
          {% set state_value = states('binary_sensor.node_proxmox02_status') %}
          {% if state_value in ['on', 'off'] %}
            {{ state_value == 'on' }}
          {% else %}
            {% set status = states('sensor.node_proxmox02_status') | lower %}
            {{ status in ['online', 'running', 'on'] }}
          {% endif %}

automation:
  - alias: "Nightly Frigate Server Reboot"
    id: nightly_frigate_reboot
    description: "Reboots the Frigate server every day at 5 AM"
    mode: single
    trigger:
      - platform: time
        at: "05:00:00"
    action:
      - service: button.press
        target:
          entity_id: button.qemu_docker2_101_reboot
      - service: script.send_to_logbook
        data:
          topic: "FRIGATE"
          message: "Frigate server rebooted at 5 AM."

  - alias: "Proxmox Updates Repair Issues"
    id: proxmox_updates_repair
    description: "Track repair issues when Proxmox hosts report updates."
    mode: restart
    trigger:
      - platform: state
        entity_id: binary_sensor.node_proxmox1_updates_packages
      - platform: state
        entity_id: binary_sensor.node_proxmox02_updates_packages
    variables:
      node_name: >
                {% if 'proxmox1' in trigger.entity_id %}Proxmox1{% else %}Proxmox02{% endif %}
      issue_id: >
        {% if 'proxmox1' in trigger.entity_id %}
          proxmox1_updates_available
        {% else %}
          proxmox02_updates_available
        {% endif %}
    action:
      - choose:
          - conditions: "{{ trigger.to_state.state == 'on' }}"
            sequence:
              - service: repairs.create
                data:
                  issue_id: "{{ issue_id }}"
                  severity: warning
                  persistent: false
                  title: "{{ node_name }} has updates available"
                  description: >
                    {{ trigger.entity_id }} is ON, indicating pending updates on {{ node_name }}.
                    Apply updates in Proxmox, then reload this sensor to clear the issue.
        default:
          - service: repairs.remove
            data:
              issue_id: "{{ issue_id }}"
          - service: script.send_to_logbook
            data:
              topic: "PROXMOX"
              message: "{{ node_name }} has been Patched"

  - alias: "Proxmox Runtime Repair Issues"
    id: proxmox_runtime_repairs
    description: "Create and clear Repairs when Proxmox node runtime becomes unhealthy."
    mode: restart
    trigger:
      - platform: state
        entity_id:
          - binary_sensor.proxmox1_runtime_healthy
          - binary_sensor.proxmox02_runtime_healthy
    variables:
      node_name: >-
                {% if 'proxmox1' in trigger.entity_id %}Proxmox1{% else %}Proxmox02{% endif %}
      issue_id: >-
        {% if 'proxmox1' in trigger.entity_id %}
          proxmox1_runtime_unhealthy
        {% else %}
          proxmox02_runtime_unhealthy
        {% endif %}
      runtime_entity: >-
        {% if 'proxmox1' in trigger.entity_id %}
          binary_sensor.proxmox1_runtime_healthy
        {% else %}
          binary_sensor.proxmox02_runtime_healthy
        {% endif %}
      status_entity: >-
        {% if 'proxmox1' in trigger.entity_id %}
          {% if states('binary_sensor.node_proxmox1_status') not in ['unknown', 'unavailable', 'none', ''] %}
            binary_sensor.node_proxmox1_status
          {% else %}
            sensor.node_proxmox1_status
          {% endif %}
        {% else %}
          {% if states('binary_sensor.node_proxmox02_status') not in ['unknown', 'unavailable', 'none', ''] %}
            binary_sensor.node_proxmox02_status
          {% else %}
            sensor.node_proxmox02_status
          {% endif %}
        {% endif %}
      status_value: "{{ states(status_entity) }}"
      trigger_context: "HA automation proxmox_runtime_repairs (Proxmox Runtime Repair Issues)"
    action:
      - choose:
          - conditions: "{{ trigger.to_state.state == 'off' }}"
            sequence:
              - delay: "00:02:00"
              - condition: template
                value_template: "{{ is_state(trigger.entity_id, 'off') }}"
              - service: repairs.create
                data:
                  issue_id: "{{ issue_id }}"
                  severity: error
                  persistent: true
                  title: "{{ node_name }} runtime degraded"
                  description: >
                    {{ node_name }} has remained offline for over 2 minutes.
                    Check node status in Proxmox and restore runtime.
              - service: script.joanna_dispatch
                data:
                  trigger_context: "{{ trigger_context }}"
                  source: "home_assistant_automation.proxmox_runtime_repairs"
                  summary: "{{ node_name }} runtime has remained degraded for over 2 minutes"
                  entity_ids:
                    - "{{ runtime_entity }}"
                    - "{{ status_entity }}"
                  diagnostics: >-
                    issue_id={{ issue_id }},
                    node_name={{ node_name }},
                    runtime_entity={{ runtime_entity }},
                    status_entity={{ status_entity }},
                    status_value={{ status_value }},
                    unhealthy_for=2m
                  request: >-
                    Investigate {{ node_name }} runtime degradation and restore node availability if possible.
                    Check host status, cluster connectivity, storage reachability, and recent update activity first.
                    Do not reboot the host unless explicitly requested.
              - service: script.send_to_logbook
                data:
                  topic: "PROXMOX"
                  message: >-
                                        {{ node_name }} runtime is degraded. Repair {{ issue_id }} opened and Joanna investigation requested.
        default:
          - service: repairs.remove
            continue_on_error: true
            data:
              issue_id: "{{ issue_id }}"
          - service: script.send_to_logbook
            data:
              topic: "PROXMOX"
              message: "{{ node_name }} runtime recovered."

  - alias: "Proxmox Disk Pressure Repair Issues"
    id: proxmox_disk_pressure_repairs
    description: "Create and clear Repairs when Proxmox node disk usage stays elevated."
    mode: restart
    trigger:
      - platform: numeric_state
        entity_id:
          - sensor.proxmox1_disk_used_percentage
          - sensor.proxmox02_disk_used_percentage
        above: 85
        below: 92
        for: "00:15:00"
        id: warning
      - platform: numeric_state
        entity_id:
          - sensor.proxmox1_disk_used_percentage
          - sensor.proxmox02_disk_used_percentage
        above: 92
        id: critical
      - platform: state
        entity_id:
          - sensor.proxmox1_disk_used_percentage
          - sensor.proxmox02_disk_used_percentage
        id: band_change
      - platform: numeric_state
        entity_id:
          - sensor.proxmox1_disk_used_percentage
          - sensor.proxmox02_disk_used_percentage
        below: 85
        id: recovered
    variables:
      node_name: >-
                {% if 'proxmox1' in trigger.entity_id %}Proxmox1{% else %}Proxmox02{% endif %}
      issue_id: >-
        {% if 'proxmox1' in trigger.entity_id %}
          proxmox1_disk_pressure
        {% else %}
          proxmox02_disk_pressure
        {% endif %}
      disk_entity: "{{ trigger.entity_id }}"
      raw_disk_entity: >-
        {% if 'proxmox1' in trigger.entity_id %}
          sensor.node_proxmox1_disk_used_percentage
        {% else %}
          sensor.node_proxmox02_disk_used_percentage
        {% endif %}
      disk_pct: "{{ states(disk_entity) | float(0) }}"
      previous_disk_pct: >-
        {% if trigger.from_state is not none and trigger.from_state.state not in ['unknown', 'unavailable', 'none', ''] %}
          {{ trigger.from_state.state | float(0) }}
        {% else %}
          0
        {% endif %}
      previous_band: >-
        {% if previous_disk_pct >= 92 %}
          critical
        {% elif previous_disk_pct >= 85 %}
          warning
        {% else %}
          normal
        {% endif %}
    action:
      - choose:
          - conditions:
              - condition: trigger
                id: critical
            sequence:
              - service: repairs.create
                data:
                  issue_id: "{{ issue_id }}"
                  severity: error
                  persistent: true
                  title: "{{ node_name }} disk pressure critical ({{ disk_pct | round(1) }}%)"
                  description: >
                    {{ node_name }} disk usage is critically high.
                    Free disk space or expand storage allocation.
              - service: script.joanna_dispatch
                data:
                  trigger_context: "HA automation proxmox_disk_pressure_repairs (Proxmox Disk Pressure Repair Issues - Critical)"
                  source: "home_assistant_automation.proxmox_disk_pressure_repairs.critical"
                  summary: "{{ node_name }} disk pressure is critical at {{ disk_pct | round(1) }}%"
                  entity_ids:
                    - "{{ disk_entity }}"
                    - "{{ raw_disk_entity }}"
                  diagnostics: >-
                    issue_id={{ issue_id }},
                    node_name={{ node_name }},
                    disk_entity={{ disk_entity }},
                    raw_disk_entity={{ raw_disk_entity }},
                    disk_pct={{ disk_pct | round(1) }},
                    threshold=92
                  request: >-
                    Investigate critical disk pressure on {{ node_name }} and recommend safe remediation.
                    Check local storage usage, backups, logs, snapshots, and VM or container disk consumers first.
                    Do not delete VM disks or reboot the host unless explicitly requested.
              - service: script.send_to_logbook
                data:
                  topic: "PROXMOX"
                  message: >-
                    {{ node_name }} disk usage is critical at {{ disk_pct | round(1) }}%.
                    Repair {{ issue_id }} opened and Joanna investigation requested.
          - conditions:
              - condition: trigger
                id: warning
              - condition: template
                value_template: "{{ previous_band != 'critical' }}"
            sequence:
              - service: repairs.create
                data:
                  issue_id: "{{ issue_id }}"
                  severity: warning
                  persistent: true
                  title: "{{ node_name }} disk pressure warning ({{ disk_pct | round(1) }}%)"
                  description: >
                    {{ node_name }} disk usage has stayed above 85% for 15 minutes.
                    Plan cleanup before capacity reaches critical levels.
              - service: script.joanna_dispatch
                data:
                  trigger_context: "HA automation proxmox_disk_pressure_repairs (Proxmox Disk Pressure Repair Issues - Warning)"
                  source: "home_assistant_automation.proxmox_disk_pressure_repairs.warning"
                  summary: "{{ node_name }} disk pressure warning at {{ disk_pct | round(1) }}%"
                  entity_ids:
                    - "{{ disk_entity }}"
                    - "{{ raw_disk_entity }}"
                  diagnostics: >-
                    issue_id={{ issue_id }},
                    node_name={{ node_name }},
                    disk_entity={{ disk_entity }},
                    raw_disk_entity={{ raw_disk_entity }},
                    disk_pct={{ disk_pct | round(1) }},
                    threshold=85,
                    sustained_for=15m
                  request: >-
                    Investigate elevated disk usage on {{ node_name }} and recommend safe cleanup actions before it becomes critical.
                    Check local storage usage, backups, logs, snapshots, and VM or container disk consumers first.
                    Do not delete VM disks or reboot the host unless explicitly requested.
              - service: script.send_to_logbook
                data:
                  topic: "PROXMOX"
                  message: >-
                    {{ node_name }} disk usage warning at {{ disk_pct | round(1) }}%.
                    Repair {{ issue_id }} opened and Joanna investigation requested.
          - conditions:
              - condition: trigger
                id: band_change
              - condition: template
                value_template: "{{ previous_band == 'critical' and disk_pct >= 85 and disk_pct < 92 }}"
            sequence:
              - service: repairs.create
                data:
                  issue_id: "{{ issue_id }}"
                  severity: warning
                  persistent: true
                  title: "{{ node_name }} disk pressure warning ({{ disk_pct | round(1) }}%)"
                  description: >
                    {{ node_name }} disk usage is elevated but no longer critical.
                    Plan cleanup before capacity reaches critical levels again.
          - conditions:
              - condition: trigger
                id: recovered
            sequence:
              - service: repairs.remove
                continue_on_error: true
                data:
                  issue_id: "{{ issue_id }}"