###################################################################### # @CCOSTAN - Follow Me on X # For more info visit https://www.vcloudinfo.com/click-here # Original Repo : https://github.com/CCOSTAN/Home-AssistantConfig # ------------------------------------------------------------------- # Infrastructure Observability - Normalized infra monitoring signals # WAN/DNS/website/domain/cert state normalized for dashboards. # ------------------------------------------------------------------- # Related Issue: 1584 # Notes: Home dashboard consumes `infra_*` entities for exceptions-only alerts. # Notes: Domain warning threshold is <30 days; critical threshold is <14 days. # Notes: Nightly Duplicati verification is performed by codex_appliance against the Duplicati API because HA backup entities are not available. ###################################################################### command_line: - sensor: name: Infra WAN Packet Loss unique_id: infra_wan_packet_loss command: >- ping -q -c 10 -W 1 1.1.1.1 2>/dev/null | awk -F',' '/packet loss/ {gsub(/[^0-9.]/, "", $3); print $3; found=1} END {if (!found) print "unknown"}' scan_interval: 300 unit_of_measurement: "%" value_template: "{{ (value | regex_replace('[^0-9.]', '')) or 'unknown' }}" - sensor: name: Infra WAN Latency Ms unique_id: infra_wan_latency_ms command: >- ping -q -c 10 -W 1 1.1.1.1 2>/dev/null | awk -F'/' '/^rtt|^round-trip/ {gsub(/[^0-9.]/, "", $5); print $5; found=1} END {if (!found) print "unknown"}' scan_interval: 300 unit_of_measurement: "ms" value_template: "{{ (value | regex_replace('[^0-9.]', '')) or 'unknown' }}" - sensor: name: Infra External IP Fallback unique_id: infra_external_ip_fallback command: "curl -fsS https://api.ipify.org || echo unknown" scan_interval: 900 template: - sensor: - name: "Infra External IP" unique_id: infra_external_ip state: >- {% set primary = states('sensor.external_ip') | trim %} {% set fallback = states('sensor.infra_external_ip_fallback') | trim %} {% if primary not in ['unknown', 'unavailable', 'none', ''] %} {{ primary }} {% else %} {{ fallback }} {% endif %} - name: "Infra Domain Expiry Min Days" unique_id: infra_domain_expiry_min_days unit_of_measurement: "d" state: >- {% set ids = [ 'sensor.vcloudinfo_com_days_until_expiration', 'sensor.ipmer_com_days_until_expiration', 'sensor.fordst_com_days_until_expiration', 'sensor.kingcrafthomes_com_days_until_expiration' ] %} {% set ns = namespace(min=9999, any=false) %} {% for id in ids %} {% if expand(id) | count > 0 %} {% set raw = states(id) %} {% if raw not in ['unknown', 'unavailable', 'none', ''] %} {% set ns.any = true %} {% set val = raw | float(9999) %} {% if val < ns.min %} {% set ns.min = val %} {% endif %} {% endif %} {% endif %} {% endfor %} {% if ns.any %} {{ ns.min | round(0) }} {% else %} {{ none }} {% endif %} - name: "Infra Cert Expiry Min Days" unique_id: infra_cert_expiry_min_days unit_of_measurement: "d" state: >- {% set ns = namespace(min=9999, any=false) %} {% for item in states.sensor %} {% if item.entity_id is search('(vcloudinfo|ipmer|fordst|kingcrafthomes).*(cert|ssl|tls)') %} {% set raw = item.state %} {% if raw not in ['unknown', 'unavailable', 'none', ''] %} {% set value = raw | float(9999) %} {% if value != 9999 %} {% set ns.any = true %} {% if value < ns.min %} {% set ns.min = value %} {% endif %} {% endif %} {% endif %} {% endif %} {% endfor %} {% if ns.any %} {{ ns.min | round(0) }} {% else %} {{ none }} {% endif %} - name: "Infra Cert Telemetry Count" unique_id: infra_cert_telemetry_count icon: mdi:counter state: >- {% set ns = namespace(count=0) %} {% for item in states.sensor %} {% if item.entity_id is search('(vcloudinfo|ipmer|fordst|kingcrafthomes).*(cert|ssl|tls)') %} {% set ns.count = ns.count + 1 %} {% endif %} {% endfor %} {{ ns.count }} - name: "Infra Website Down Count" unique_id: infra_website_down_count icon: mdi:counter state: >- {% set ids = [ 'binary_sensor.vcloudinfo_com', 'binary_sensor.ipmer_com', 'binary_sensor.fordst_com', 'binary_sensor.www_kingcrafthomes_com' ] %} {% set ns = namespace(count=0) %} {% for id in ids %} {% if expand(id) | count > 0 %} {% set st = states(id) %} {% if st in ['off', 'unknown', 'unavailable'] %} {% set ns.count = ns.count + 1 %} {% endif %} {% endif %} {% endfor %} {{ ns.count }} - binary_sensor: - name: "Infra WAN Quality Degraded" unique_id: infra_wan_quality_degraded device_class: problem state: >- {% set loss_raw = states('sensor.infra_wan_packet_loss') %} {% set lat_raw = states('sensor.infra_wan_latency_ms') %} {% set invalid = loss_raw in ['unknown', 'unavailable', 'none', ''] or lat_raw in ['unknown', 'unavailable', 'none', ''] %} {% set loss = loss_raw | float(0) %} {% set lat = lat_raw | float(0) %} {{ invalid or loss > 5 or lat > 80 }} - name: "Infra DNS Pihole Degraded" unique_id: infra_dns_pihole_degraded device_class: problem state: >- {% set switch_state = states('switch.pi_hole') %} {% set service_state = states('binary_sensor.pihole_status') %} {{ switch_state != 'on' or service_state in ['off', 'unavailable', 'unknown'] }} - name: "Infra UPS On Battery" unique_id: infra_ups_on_battery device_class: problem state: >- {% set status = states('sensor.garage_ups_status') | upper %} {{ 'OB' in status }} - name: "Infra Website Degraded" unique_id: infra_website_degraded device_class: problem state: >- {{ states('sensor.infra_website_down_count') | int(0) > 0 }} - name: "Infra Website Uptime SLO Breach" unique_id: infra_website_uptime_slo_breach device_class: problem state: >- {% set ns = namespace(seen=false, breach=false) %} {% for item in states.sensor %} {% if item.entity_id is search('sensor\\.(vcloudinfo_com|kingcrafthomes_com|www_kingcrafthomes_com).*uptime_1d$') %} {% if item.state not in ['unknown', 'unavailable', 'none', ''] %} {% set ns.seen = true %} {% if (item.state | float(100)) < 99 %} {% set ns.breach = true %} {% endif %} {% endif %} {% endif %} {% endfor %} {{ ns.seen and ns.breach }} - name: "Infra Website Latency Degraded" unique_id: infra_website_latency_degraded device_class: problem state: >- {% set ns = namespace(seen=false, breach=false) %} {% for item in states.sensor %} {% if item.entity_id is search('sensor\\.(vcloudinfo_com|kingcrafthomes_com|www_kingcrafthomes_com).*avg_response_time_1d$') %} {% if item.state not in ['unknown', 'unavailable', 'none', ''] %} {% set ns.seen = true %} {% if (item.state | float(0)) > 1.2 %} {% set ns.breach = true %} {% endif %} {% endif %} {% endif %} {% endfor %} {{ ns.seen and ns.breach }} - name: "Infra Domain Expiry Critical" unique_id: infra_domain_expiry_critical device_class: problem state: >- {% set d = states('sensor.infra_domain_expiry_min_days') %} {% if d in ['unknown', 'unavailable', 'none', ''] %} false {% else %} {{ d | float(9999) < 14 }} {% endif %} - name: "Infra Domain Expiry Warning" unique_id: infra_domain_expiry_warning device_class: problem state: >- {% set d = states('sensor.infra_domain_expiry_min_days') %} {% if d in ['unknown', 'unavailable', 'none', ''] %} false {% else %} {% set days = d | float(9999) %} {{ days < 30 and days >= 14 }} {% endif %} - name: "Infra Cert Expiry Critical" unique_id: infra_cert_expiry_critical device_class: problem state: >- {% set d = states('sensor.infra_cert_expiry_min_days') %} {% if d in ['unknown', 'unavailable', 'none', ''] %} false {% else %} {{ d | float(9999) < 14 }} {% endif %} - name: "Infra Cert Expiry Warning" unique_id: infra_cert_expiry_warning device_class: problem state: >- {% set d = states('sensor.infra_cert_expiry_min_days') %} {% if d in ['unknown', 'unavailable', 'none', ''] %} false {% else %} {% set days = d | float(9999) %} {{ days < 30 and days >= 14 }} {% endif %} automation: - alias: "Infrastructure - External IP Change Logbook" id: infra_external_ip_change_logbook description: "Log external IP changes into the Activity feed." mode: queued trigger: - platform: state entity_id: sensor.infra_external_ip condition: - condition: template value_template: "{{ trigger.from_state is not none }}" - condition: template value_template: >- {{ trigger.from_state.state not in ['unknown', 'unavailable', 'none', ''] and trigger.to_state.state not in ['unknown', 'unavailable', 'none', ''] and trigger.from_state.state != trigger.to_state.state }} action: - service: script.send_to_logbook data: topic: "NETWORK" message: >- External IP changed from {{ trigger.from_state.state }} to {{ trigger.to_state.state }}. - alias: "Infrastructure - Website Uptime SLO Repair" id: infra_website_uptime_slo_repair description: "Create/clear Repairs issue when website 1-day uptime breaches SLO." mode: queued trigger: - platform: state entity_id: binary_sensor.infra_website_uptime_slo_breach action: - choose: - conditions: "{{ trigger.to_state.state == 'on' }}" sequence: - service: repairs.create data: issue_id: infra_website_uptime_slo_breach title: "Website uptime SLO breached" description: > At least one monitored website has uptime_1d below 99%. Review Uptime Kuma entities on the Website Health dashboard. severity: warning persistent: true default: - service: repairs.remove continue_on_error: true data: issue_id: infra_website_uptime_slo_breach - alias: "Infrastructure - Website Latency Repair" id: infra_website_latency_repair description: "Create/clear Repairs issue when website response times degrade." mode: queued trigger: - platform: state entity_id: binary_sensor.infra_website_latency_degraded action: - choose: - conditions: "{{ trigger.to_state.state == 'on' }}" sequence: - service: repairs.create data: issue_id: infra_website_latency_degraded title: "Website latency degraded" description: > At least one monitored website reports avg_response_time_1d above 1.2s. Review Uptime Kuma response-time entities on Website Health. severity: warning persistent: true default: - service: repairs.remove continue_on_error: true data: issue_id: infra_website_latency_degraded - alias: "Infrastructure - Backup Nightly Verification" id: infra_backup_nightly_verification description: "Use codex_appliance to verify the latest Duplicati run and dispatch Joanna only on failure." mode: single trigger: - platform: time at: "06:15:00" action: - variables: trigger_context: "HA automation infra_backup_nightly_verification (Infrastructure - Backup Nightly Verification)" duplicati_state: "{{ states('switch.duplicati_container') }}" - action: rest_command.bearclaw_duplicati_verify data: reason: "ha_nightly" response_variable: duplicati_verify - service: script.send_to_logbook data: topic: "BACKUP" message: >- {% set payload = duplicati_verify['content'] if duplicati_verify is mapping and duplicati_verify['content'] is mapping else {} %} {% set detail = payload['detail'] if payload is mapping and payload['detail'] is mapping else {} %} {{ detail.get('summary', 'Nightly Duplicati verification completed.') }} - variables: verify_payload: "{{ duplicati_verify['content'] if duplicati_verify is mapping and duplicati_verify['content'] is mapping else {} }}" verify_detail: "{{ verify_payload['detail'] if verify_payload is mapping and verify_payload['detail'] is mapping else {} }}" verify_http_status: "{{ duplicati_verify['status'] | int(0) if duplicati_verify is mapping else 0 }}" verify_healthy: "{{ verify_payload.get('ok', false) and verify_detail.get('healthy', false) }}" verify_status: "{{ verify_detail.get('status', 'unknown') }}" verify_summary: "{{ verify_detail.get('summary', 'Duplicati verification did not return a summary.') }}" verify_issue: "{{ verify_detail.get('issue', verify_payload.get('error', 'duplicati_verify_failed')) }}" verify_backup_name: "{{ verify_detail.get('backupName', 'Docker_Configs') }}" verify_latest_result: "{{ verify_detail.get('latestResult', {}) if verify_detail is mapping else {} }}" verify_last_success: "{{ verify_detail.get('lastSuccessfulRun', {}) if verify_detail is mapping else {} }}" - choose: - conditions: "{{ verify_healthy }}" sequence: - service: repairs.remove continue_on_error: true data: issue_id: infra_duplicati_backup_failure default: - service: repairs.create data: issue_id: infra_duplicati_backup_failure title: "Duplicati nightly backup verification failed" description: >- {{ verify_summary }} Backup={{ verify_backup_name }}; status={{ verify_status }}; last_result={{ verify_latest_result.get('endedAt', 'n/a') }}; last_success={{ verify_last_success.get('endedAt', 'n/a') }}. severity: error persistent: true - service: script.joanna_dispatch data: trigger_context: "{{ trigger_context }}" source: "home_assistant_automation.infra_backup_nightly_verification" summary: "Nightly Duplicati backup verification failed" entity_ids: - "switch.duplicati_container" diagnostics: >- scheduled_time=06:15:00, duplicati_container={{ duplicati_state }}, verifier_http_status={{ verify_http_status }}, verifier_status={{ verify_status }}, verifier_issue={{ verify_issue }}, backup_name={{ verify_backup_name }}, latest_result={{ verify_latest_result.get('endedAt', 'n/a') }}, last_success={{ verify_last_success.get('endedAt', 'n/a') }} request: >- Investigate the Duplicati backup job {{ verify_backup_name }}. The codex_appliance verifier reported status {{ verify_status }} with issue {{ verify_issue }}. Use the Duplicati API or UI directly, resolve the failure if possible, and verify a successful run before closing out. Reply with explicit status fields: resolved=true/false, backup_status, last_success_time, root_cause, action_taken, verification, next_action_required=true/false.