You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

457 lines
20 KiB

######################################################################
# @CCOSTAN - Follow Me on X
# For more info visit https://www.vcloudinfo.com/click-here
# Original Repo : https://github.com/CCOSTAN/Home-AssistantConfig
# -------------------------------------------------------------------
# Infrastructure - Observability and Joanna review workflows
# WAN/DNS/website/domain/cert state normalized for dashboards, plus scheduled infrastructure reviews.
# -------------------------------------------------------------------
# Related Issue: 1584
# Notes: Home dashboard consumes `infra_*` entities for exceptions-only alerts.
# Notes: Domain warning threshold is <30 days; critical threshold is <14 days.
# Notes: Nightly Duplicati verification is performed by codex_appliance against the Duplicati API because HA backup entities are not available.
# Notes: Monthly HA log hygiene review requests Telegram + GitHub issue follow-up only; Joanna must wait for approval before any changes.
# Notes: Numeric WAN telemetry exposes state_class so recorder can keep long-term statistics.
######################################################################
command_line:
- sensor:
name: Infra WAN Packet Loss
unique_id: infra_wan_packet_loss
command: >-
ping -q -c 10 -W 1 1.1.1.1 2>/dev/null |
awk -F',' '/packet loss/ {gsub(/[^0-9.]/, "", $3); print $3; found=1}
END {if (!found) print "unknown"}'
scan_interval: 300
unit_of_measurement: "%"
state_class: measurement
value_template: "{{ (value | regex_replace('[^0-9.]', '')) or 'unknown' }}"
- sensor:
name: Infra WAN Latency Ms
unique_id: infra_wan_latency_ms
command: >-
ping -q -c 10 -W 1 1.1.1.1 2>/dev/null |
awk -F'/' '/^rtt|^round-trip/ {gsub(/[^0-9.]/, "", $5); print $5; found=1}
END {if (!found) print "unknown"}'
scan_interval: 300
unit_of_measurement: "ms"
state_class: measurement
value_template: "{{ (value | regex_replace('[^0-9.]', '')) or 'unknown' }}"
- sensor:
name: Infra External IP Fallback
unique_id: infra_external_ip_fallback
command: "curl -fsS https://api.ipify.org || echo unknown"
scan_interval: 900
template:
- sensor:
- name: "Infra External IP"
unique_id: infra_external_ip
state: >-
{% set primary = states('sensor.external_ip') | trim %}
{% set fallback = states('sensor.infra_external_ip_fallback') | trim %}
{% if primary not in ['unknown', 'unavailable', 'none', ''] %}
{{ primary }}
{% else %}
{{ fallback }}
{% endif %}
- name: "Infra Domain Expiry Min Days"
unique_id: infra_domain_expiry_min_days
unit_of_measurement: "d"
state: >-
{% set ids = [
'sensor.vcloudinfo_com_days_until_expiration',
'sensor.ipmer_com_days_until_expiration',
'sensor.fordst_com_days_until_expiration',
'sensor.kingcrafthomes_com_days_until_expiration'
] %}
{% set ns = namespace(min=9999, any=false) %}
{% for id in ids %}
{% if expand(id) | count > 0 %}
{% set raw = states(id) %}
{% if raw not in ['unknown', 'unavailable', 'none', ''] %}
{% set ns.any = true %}
{% set val = raw | float(9999) %}
{% if val < ns.min %}
{% set ns.min = val %}
{% endif %}
{% endif %}
{% endif %}
{% endfor %}
{% if ns.any %}
{{ ns.min | round(0) }}
{% else %}
{{ none }}
{% endif %}
- name: "Infra Cert Expiry Min Days"
unique_id: infra_cert_expiry_min_days
unit_of_measurement: "d"
state: >-
{% set ns = namespace(min=9999, any=false) %}
{% for item in states.sensor %}
{% if item.entity_id is search('(vcloudinfo|ipmer|fordst|kingcrafthomes).*(cert|ssl|tls)') %}
{% set raw = item.state %}
{% if raw not in ['unknown', 'unavailable', 'none', ''] %}
{% set value = raw | float(9999) %}
{% if value != 9999 %}
{% set ns.any = true %}
{% if value < ns.min %}
{% set ns.min = value %}
{% endif %}
{% endif %}
{% endif %}
{% endif %}
{% endfor %}
{% if ns.any %}
{{ ns.min | round(0) }}
{% else %}
{{ none }}
{% endif %}
- name: "Infra Cert Telemetry Count"
unique_id: infra_cert_telemetry_count
icon: mdi:counter
state: >-
{% set ns = namespace(count=0) %}
{% for item in states.sensor %}
{% if item.entity_id is search('(vcloudinfo|ipmer|fordst|kingcrafthomes).*(cert|ssl|tls)') %}
{% set ns.count = ns.count + 1 %}
{% endif %}
{% endfor %}
{{ ns.count }}
- name: "Infra Website Down Count"
unique_id: infra_website_down_count
icon: mdi:counter
state: >-
{% set ids = [
'binary_sensor.vcloudinfo_com',
'binary_sensor.ipmer_com',
'binary_sensor.fordst_com',
'binary_sensor.www_kingcrafthomes_com'
] %}
{% set ns = namespace(count=0) %}
{% for id in ids %}
{% if expand(id) | count > 0 %}
{% set st = states(id) %}
{% if st in ['off', 'unknown', 'unavailable'] %}
{% set ns.count = ns.count + 1 %}
{% endif %}
{% endif %}
{% endfor %}
{{ ns.count }}
- binary_sensor:
- name: "Infra WAN Quality Degraded"
unique_id: infra_wan_quality_degraded
device_class: problem
state: >-
{% set loss_raw = states('sensor.infra_wan_packet_loss') %}
{% set lat_raw = states('sensor.infra_wan_latency_ms') %}
{% set invalid = loss_raw in ['unknown', 'unavailable', 'none', ''] or
lat_raw in ['unknown', 'unavailable', 'none', ''] %}
{% set loss = loss_raw | float(0) %}
{% set lat = lat_raw | float(0) %}
{{ invalid or loss > 5 or lat > 80 }}
- name: "Infra DNS Pihole Degraded"
unique_id: infra_dns_pihole_degraded
device_class: problem
state: >-
{% set switch_state = states('switch.pi_hole') %}
{% set service_state = states('binary_sensor.pihole_status') %}
{{ switch_state != 'on' or service_state in ['off', 'unavailable', 'unknown'] }}
- name: "Infra UPS On Battery"
unique_id: infra_ups_on_battery
device_class: problem
state: >-
{% set status = states('sensor.garage_ups_status') | upper %}
{{ 'OB' in status }}
- name: "Infra Website Degraded"
unique_id: infra_website_degraded
device_class: problem
state: >-
{{ states('sensor.infra_website_down_count') | int(0) > 0 }}
- name: "Infra Website Uptime SLO Breach"
unique_id: infra_website_uptime_slo_breach
device_class: problem
state: >-
{% set ns = namespace(seen=false, breach=false) %}
{% for item in states.sensor %}
{% if item.entity_id is search('sensor\\.(vcloudinfo_com|kingcrafthomes_com|www_kingcrafthomes_com).*uptime_1d$') %}
{% if item.state not in ['unknown', 'unavailable', 'none', ''] %}
{% set ns.seen = true %}
{% if (item.state | float(100)) < 99 %}
{% set ns.breach = true %}
{% endif %}
{% endif %}
{% endif %}
{% endfor %}
{{ ns.seen and ns.breach }}
- name: "Infra Website Latency Degraded"
unique_id: infra_website_latency_degraded
device_class: problem
state: >-
{% set ns = namespace(seen=false, breach=false) %}
{% for item in states.sensor %}
{% if item.entity_id is search('sensor\\.(vcloudinfo_com|kingcrafthomes_com|www_kingcrafthomes_com).*avg_response_time_1d$') %}
{% if item.state not in ['unknown', 'unavailable', 'none', ''] %}
{% set ns.seen = true %}
{% if (item.state | float(0)) > 1.2 %}
{% set ns.breach = true %}
{% endif %}
{% endif %}
{% endif %}
{% endfor %}
{{ ns.seen and ns.breach }}
- name: "Infra Domain Expiry Critical"
unique_id: infra_domain_expiry_critical
device_class: problem
state: >-
{% set d = states('sensor.infra_domain_expiry_min_days') %}
{% if d in ['unknown', 'unavailable', 'none', ''] %}
false
{% else %}
{{ d | float(9999) < 14 }}
{% endif %}
- name: "Infra Domain Expiry Warning"
unique_id: infra_domain_expiry_warning
device_class: problem
state: >-
{% set d = states('sensor.infra_domain_expiry_min_days') %}
{% if d in ['unknown', 'unavailable', 'none', ''] %}
false
{% else %}
{% set days = d | float(9999) %}
{{ days < 30 and days >= 14 }}
{% endif %}
- name: "Infra Cert Expiry Critical"
unique_id: infra_cert_expiry_critical
device_class: problem
state: >-
{% set d = states('sensor.infra_cert_expiry_min_days') %}
{% if d in ['unknown', 'unavailable', 'none', ''] %}
false
{% else %}
{{ d | float(9999) < 14 }}
{% endif %}
- name: "Infra Cert Expiry Warning"
unique_id: infra_cert_expiry_warning
device_class: problem
state: >-
{% set d = states('sensor.infra_cert_expiry_min_days') %}
{% if d in ['unknown', 'unavailable', 'none', ''] %}
false
{% else %}
{% set days = d | float(9999) %}
{{ days < 30 and days >= 14 }}
{% endif %}
automation:
- alias: "Infrastructure - External IP Change Logbook"
id: infra_external_ip_change_logbook
description: "Log external IP changes into the Activity feed."
mode: queued
trigger:
- platform: state
entity_id: sensor.infra_external_ip
condition:
- condition: template
value_template: "{{ trigger.from_state is not none }}"
- condition: template
value_template: >-
{{ trigger.from_state.state not in ['unknown', 'unavailable', 'none', ''] and
trigger.to_state.state not in ['unknown', 'unavailable', 'none', ''] and
trigger.from_state.state != trigger.to_state.state }}
action:
- service: script.send_to_logbook
data:
topic: "NETWORK"
message: >-
External IP changed from {{ trigger.from_state.state }} to {{ trigger.to_state.state }}.
- alias: "Infrastructure - Website Uptime SLO Repair"
id: infra_website_uptime_slo_repair
description: "Create/clear Repairs issue when website 1-day uptime breaches SLO."
mode: queued
trigger:
- platform: state
entity_id: binary_sensor.infra_website_uptime_slo_breach
action:
- choose:
- conditions: "{{ trigger.to_state.state == 'on' }}"
sequence:
- service: repairs.create
data:
issue_id: infra_website_uptime_slo_breach
title: "Website uptime SLO breached"
description: >
At least one monitored website has uptime_1d below 99%.
Review Uptime Kuma entities on the Website Health dashboard.
severity: warning
persistent: true
default:
- service: repairs.remove
continue_on_error: true
data:
issue_id: infra_website_uptime_slo_breach
- alias: "Infrastructure - Website Latency Repair"
id: infra_website_latency_repair
description: "Create/clear Repairs issue when website response times degrade."
mode: queued
trigger:
- platform: state
entity_id: binary_sensor.infra_website_latency_degraded
action:
- choose:
- conditions: "{{ trigger.to_state.state == 'on' }}"
sequence:
- service: repairs.create
data:
issue_id: infra_website_latency_degraded
title: "Website latency degraded"
description: >
At least one monitored website reports avg_response_time_1d above 1.2s.
Review Uptime Kuma response-time entities on Website Health.
severity: warning
persistent: true
default:
- service: repairs.remove
continue_on_error: true
data:
issue_id: infra_website_latency_degraded
- alias: "Infrastructure - Backup Nightly Verification"
id: infra_backup_nightly_verification
description: "Use codex_appliance to verify the latest Duplicati run and dispatch Joanna only on failure."
mode: single
trigger:
- platform: time
at: "06:15:00"
action:
- variables:
trigger_context: "HA automation infra_backup_nightly_verification (Infrastructure - Backup Nightly Verification)"
duplicati_state: "{{ states('switch.duplicati_container') }}"
- action: rest_command.bearclaw_duplicati_verify
data:
reason: "ha_nightly"
response_variable: duplicati_verify
- service: script.send_to_logbook
data:
topic: "BACKUP"
message: >-
{% set payload = duplicati_verify['content'] if duplicati_verify is mapping and duplicati_verify['content'] is mapping else {} %}
{% set detail = payload['detail'] if payload is mapping and payload['detail'] is mapping else {} %}
{{ detail.get('summary', 'Nightly Duplicati verification completed.') }}
- variables:
verify_payload: "{{ duplicati_verify['content'] if duplicati_verify is mapping and duplicati_verify['content'] is mapping else {} }}"
verify_detail: "{{ verify_payload['detail'] if verify_payload is mapping and verify_payload['detail'] is mapping else {} }}"
verify_http_status: "{{ duplicati_verify['status'] | int(0) if duplicati_verify is mapping else 0 }}"
verify_healthy: "{{ verify_payload.get('ok', false) and verify_detail.get('healthy', false) }}"
verify_status: "{{ verify_detail.get('status', 'unknown') }}"
verify_summary: "{{ verify_detail.get('summary', 'Duplicati verification did not return a summary.') }}"
verify_issue: "{{ verify_detail.get('issue', verify_payload.get('error', 'duplicati_verify_failed')) }}"
verify_backup_name: "{{ verify_detail.get('backupName', 'Docker_Configs') }}"
verify_latest_result: "{{ verify_detail.get('latestResult', {}) if verify_detail is mapping else {} }}"
verify_last_success: "{{ verify_detail.get('lastSuccessfulRun', {}) if verify_detail is mapping else {} }}"
- choose:
- conditions: "{{ verify_healthy }}"
sequence:
- service: repairs.remove
continue_on_error: true
data:
issue_id: infra_duplicati_backup_failure
default:
- service: repairs.create
data:
issue_id: infra_duplicati_backup_failure
title: "Duplicati nightly backup verification failed"
description: >-
{{ verify_summary }}
Backup={{ verify_backup_name }};
status={{ verify_status }};
last_result={{ verify_latest_result.get('endedAt', 'n/a') }};
last_success={{ verify_last_success.get('endedAt', 'n/a') }}.
severity: error
persistent: true
- service: script.joanna_dispatch
data:
trigger_context: "{{ trigger_context }}"
source: "home_assistant_automation.infra_backup_nightly_verification"
summary: "Nightly Duplicati backup verification failed"
entity_ids:
- "switch.duplicati_container"
diagnostics: >-
scheduled_time=06:15:00,
duplicati_container={{ duplicati_state }},
verifier_http_status={{ verify_http_status }},
verifier_status={{ verify_status }},
verifier_issue={{ verify_issue }},
backup_name={{ verify_backup_name }},
latest_result={{ verify_latest_result.get('endedAt', 'n/a') }},
last_success={{ verify_last_success.get('endedAt', 'n/a') }}
request: >-
Investigate the Duplicati backup job {{ verify_backup_name }}.
The codex_appliance verifier reported status {{ verify_status }} with issue {{ verify_issue }}.
Use the Duplicati API or UI directly, resolve the failure if possible, and verify a successful run before closing out.
Reply with explicit status fields:
resolved=true/false,
backup_status,
last_success_time,
root_cause,
action_taken,
verification,
next_action_required=true/false.
- alias: "Infrastructure - Monthly HA Log Hygiene Review"
id: infra_monthly_log_hygiene_review
description: "Ask Joanna monthly to review Home Assistant logs, create a GitHub issue with noisy entries, and send Telegram recommendations only."
mode: single
trigger:
- platform: time
at: "03:20:00"
condition:
- condition: template
value_template: "{{ now().day == 1 }}"
variables:
trigger_context: "HA automation infra_monthly_log_hygiene_review (Infrastructure - Monthly HA Log Hygiene Review)"
action:
- service: script.joanna_dispatch
data:
trigger_context: "{{ trigger_context }}"
source: "home_assistant_automation.infra_monthly_log_hygiene_review"
summary: "Monthly Home Assistant log hygiene review with GitHub issue and Telegram follow-up"
diagnostics: >-
schedule=day_1@03:20:00,
review_scope=available_home_assistant_logs,
desired_outputs=telegram_follow_up+github_issue,
github_repo=CCOSTAN/Home-AssistantConfig,
approval_required_before_changes=true
request: >-
Review the available Home Assistant log files from the last month and identify noisy,
low-value entries that could be safely suppressed, filtered, slowed, deduplicated, or
retired. Focus on practical Home Assistant-side changes such as recorder exclusions,
logger filtering, scan-interval reductions, entity retirement, or automation de-noising.
Create or refresh a GitHub issue in CCOSTAN/Home-AssistantConfig that captures the noisy
entries, estimated frequency, why each candidate is low-value, and the exact repo files
or integrations likely to change. Then send Carlo a concise Telegram summary with the top
recommendations and the GitHub issue number or link. Do not make any changes from this
review. Wait for explicit follow-up approval first.
- service: script.send_to_logbook
data:
topic: "HOME ASSISTANT"
message: "Joanna monthly Home Assistant log hygiene review dispatched; Telegram summary and GitHub issue requested."

Powered by TurnKey Linux.