You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
457 lines
20 KiB
457 lines
20 KiB
######################################################################
|
|
# @CCOSTAN - Follow Me on X
|
|
# For more info visit https://www.vcloudinfo.com/click-here
|
|
# Original Repo : https://github.com/CCOSTAN/Home-AssistantConfig
|
|
# -------------------------------------------------------------------
|
|
# Infrastructure - Observability and Joanna review workflows
|
|
# WAN/DNS/website/domain/cert state normalized for dashboards, plus scheduled infrastructure reviews.
|
|
# -------------------------------------------------------------------
|
|
# Related Issue: 1584
|
|
# Notes: Home dashboard consumes `infra_*` entities for exceptions-only alerts.
|
|
# Notes: Domain warning threshold is <30 days; critical threshold is <14 days.
|
|
# Notes: Nightly Duplicati verification is performed by codex_appliance against the Duplicati API because HA backup entities are not available.
|
|
# Notes: Monthly HA log hygiene review requests Telegram + GitHub issue follow-up only; Joanna must wait for approval before any changes.
|
|
# Notes: Numeric WAN telemetry exposes state_class so recorder can keep long-term statistics.
|
|
######################################################################
|
|
|
|
command_line:
|
|
- sensor:
|
|
name: Infra WAN Packet Loss
|
|
unique_id: infra_wan_packet_loss
|
|
command: >-
|
|
ping -q -c 10 -W 1 1.1.1.1 2>/dev/null |
|
|
awk -F',' '/packet loss/ {gsub(/[^0-9.]/, "", $3); print $3; found=1}
|
|
END {if (!found) print "unknown"}'
|
|
scan_interval: 300
|
|
unit_of_measurement: "%"
|
|
state_class: measurement
|
|
value_template: "{{ (value | regex_replace('[^0-9.]', '')) or 'unknown' }}"
|
|
|
|
- sensor:
|
|
name: Infra WAN Latency Ms
|
|
unique_id: infra_wan_latency_ms
|
|
command: >-
|
|
ping -q -c 10 -W 1 1.1.1.1 2>/dev/null |
|
|
awk -F'/' '/^rtt|^round-trip/ {gsub(/[^0-9.]/, "", $5); print $5; found=1}
|
|
END {if (!found) print "unknown"}'
|
|
scan_interval: 300
|
|
unit_of_measurement: "ms"
|
|
state_class: measurement
|
|
value_template: "{{ (value | regex_replace('[^0-9.]', '')) or 'unknown' }}"
|
|
|
|
- sensor:
|
|
name: Infra External IP Fallback
|
|
unique_id: infra_external_ip_fallback
|
|
command: "curl -fsS https://api.ipify.org || echo unknown"
|
|
scan_interval: 900
|
|
|
|
template:
|
|
- sensor:
|
|
- name: "Infra External IP"
|
|
unique_id: infra_external_ip
|
|
state: >-
|
|
{% set primary = states('sensor.external_ip') | trim %}
|
|
{% set fallback = states('sensor.infra_external_ip_fallback') | trim %}
|
|
{% if primary not in ['unknown', 'unavailable', 'none', ''] %}
|
|
{{ primary }}
|
|
{% else %}
|
|
{{ fallback }}
|
|
{% endif %}
|
|
|
|
- name: "Infra Domain Expiry Min Days"
|
|
unique_id: infra_domain_expiry_min_days
|
|
unit_of_measurement: "d"
|
|
state: >-
|
|
{% set ids = [
|
|
'sensor.vcloudinfo_com_days_until_expiration',
|
|
'sensor.ipmer_com_days_until_expiration',
|
|
'sensor.fordst_com_days_until_expiration',
|
|
'sensor.kingcrafthomes_com_days_until_expiration'
|
|
] %}
|
|
{% set ns = namespace(min=9999, any=false) %}
|
|
{% for id in ids %}
|
|
{% if expand(id) | count > 0 %}
|
|
{% set raw = states(id) %}
|
|
{% if raw not in ['unknown', 'unavailable', 'none', ''] %}
|
|
{% set ns.any = true %}
|
|
{% set val = raw | float(9999) %}
|
|
{% if val < ns.min %}
|
|
{% set ns.min = val %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{% if ns.any %}
|
|
{{ ns.min | round(0) }}
|
|
{% else %}
|
|
{{ none }}
|
|
{% endif %}
|
|
|
|
- name: "Infra Cert Expiry Min Days"
|
|
unique_id: infra_cert_expiry_min_days
|
|
unit_of_measurement: "d"
|
|
state: >-
|
|
{% set ns = namespace(min=9999, any=false) %}
|
|
{% for item in states.sensor %}
|
|
{% if item.entity_id is search('(vcloudinfo|ipmer|fordst|kingcrafthomes).*(cert|ssl|tls)') %}
|
|
{% set raw = item.state %}
|
|
{% if raw not in ['unknown', 'unavailable', 'none', ''] %}
|
|
{% set value = raw | float(9999) %}
|
|
{% if value != 9999 %}
|
|
{% set ns.any = true %}
|
|
{% if value < ns.min %}
|
|
{% set ns.min = value %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{% if ns.any %}
|
|
{{ ns.min | round(0) }}
|
|
{% else %}
|
|
{{ none }}
|
|
{% endif %}
|
|
|
|
- name: "Infra Cert Telemetry Count"
|
|
unique_id: infra_cert_telemetry_count
|
|
icon: mdi:counter
|
|
state: >-
|
|
{% set ns = namespace(count=0) %}
|
|
{% for item in states.sensor %}
|
|
{% if item.entity_id is search('(vcloudinfo|ipmer|fordst|kingcrafthomes).*(cert|ssl|tls)') %}
|
|
{% set ns.count = ns.count + 1 %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{{ ns.count }}
|
|
|
|
- name: "Infra Website Down Count"
|
|
unique_id: infra_website_down_count
|
|
icon: mdi:counter
|
|
state: >-
|
|
{% set ids = [
|
|
'binary_sensor.vcloudinfo_com',
|
|
'binary_sensor.ipmer_com',
|
|
'binary_sensor.fordst_com',
|
|
'binary_sensor.www_kingcrafthomes_com'
|
|
] %}
|
|
{% set ns = namespace(count=0) %}
|
|
{% for id in ids %}
|
|
{% if expand(id) | count > 0 %}
|
|
{% set st = states(id) %}
|
|
{% if st in ['off', 'unknown', 'unavailable'] %}
|
|
{% set ns.count = ns.count + 1 %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{{ ns.count }}
|
|
|
|
- binary_sensor:
|
|
- name: "Infra WAN Quality Degraded"
|
|
unique_id: infra_wan_quality_degraded
|
|
device_class: problem
|
|
state: >-
|
|
{% set loss_raw = states('sensor.infra_wan_packet_loss') %}
|
|
{% set lat_raw = states('sensor.infra_wan_latency_ms') %}
|
|
{% set invalid = loss_raw in ['unknown', 'unavailable', 'none', ''] or
|
|
lat_raw in ['unknown', 'unavailable', 'none', ''] %}
|
|
{% set loss = loss_raw | float(0) %}
|
|
{% set lat = lat_raw | float(0) %}
|
|
{{ invalid or loss > 5 or lat > 80 }}
|
|
|
|
- name: "Infra DNS Pihole Degraded"
|
|
unique_id: infra_dns_pihole_degraded
|
|
device_class: problem
|
|
state: >-
|
|
{% set switch_state = states('switch.pi_hole') %}
|
|
{% set service_state = states('binary_sensor.pihole_status') %}
|
|
{{ switch_state != 'on' or service_state in ['off', 'unavailable', 'unknown'] }}
|
|
|
|
- name: "Infra UPS On Battery"
|
|
unique_id: infra_ups_on_battery
|
|
device_class: problem
|
|
state: >-
|
|
{% set status = states('sensor.garage_ups_status') | upper %}
|
|
{{ 'OB' in status }}
|
|
|
|
- name: "Infra Website Degraded"
|
|
unique_id: infra_website_degraded
|
|
device_class: problem
|
|
state: >-
|
|
{{ states('sensor.infra_website_down_count') | int(0) > 0 }}
|
|
|
|
- name: "Infra Website Uptime SLO Breach"
|
|
unique_id: infra_website_uptime_slo_breach
|
|
device_class: problem
|
|
state: >-
|
|
{% set ns = namespace(seen=false, breach=false) %}
|
|
{% for item in states.sensor %}
|
|
{% if item.entity_id is search('sensor\\.(vcloudinfo_com|kingcrafthomes_com|www_kingcrafthomes_com).*uptime_1d$') %}
|
|
{% if item.state not in ['unknown', 'unavailable', 'none', ''] %}
|
|
{% set ns.seen = true %}
|
|
{% if (item.state | float(100)) < 99 %}
|
|
{% set ns.breach = true %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{{ ns.seen and ns.breach }}
|
|
|
|
- name: "Infra Website Latency Degraded"
|
|
unique_id: infra_website_latency_degraded
|
|
device_class: problem
|
|
state: >-
|
|
{% set ns = namespace(seen=false, breach=false) %}
|
|
{% for item in states.sensor %}
|
|
{% if item.entity_id is search('sensor\\.(vcloudinfo_com|kingcrafthomes_com|www_kingcrafthomes_com).*avg_response_time_1d$') %}
|
|
{% if item.state not in ['unknown', 'unavailable', 'none', ''] %}
|
|
{% set ns.seen = true %}
|
|
{% if (item.state | float(0)) > 1.2 %}
|
|
{% set ns.breach = true %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{{ ns.seen and ns.breach }}
|
|
|
|
- name: "Infra Domain Expiry Critical"
|
|
unique_id: infra_domain_expiry_critical
|
|
device_class: problem
|
|
state: >-
|
|
{% set d = states('sensor.infra_domain_expiry_min_days') %}
|
|
{% if d in ['unknown', 'unavailable', 'none', ''] %}
|
|
false
|
|
{% else %}
|
|
{{ d | float(9999) < 14 }}
|
|
{% endif %}
|
|
|
|
- name: "Infra Domain Expiry Warning"
|
|
unique_id: infra_domain_expiry_warning
|
|
device_class: problem
|
|
state: >-
|
|
{% set d = states('sensor.infra_domain_expiry_min_days') %}
|
|
{% if d in ['unknown', 'unavailable', 'none', ''] %}
|
|
false
|
|
{% else %}
|
|
{% set days = d | float(9999) %}
|
|
{{ days < 30 and days >= 14 }}
|
|
{% endif %}
|
|
|
|
- name: "Infra Cert Expiry Critical"
|
|
unique_id: infra_cert_expiry_critical
|
|
device_class: problem
|
|
state: >-
|
|
{% set d = states('sensor.infra_cert_expiry_min_days') %}
|
|
{% if d in ['unknown', 'unavailable', 'none', ''] %}
|
|
false
|
|
{% else %}
|
|
{{ d | float(9999) < 14 }}
|
|
{% endif %}
|
|
|
|
- name: "Infra Cert Expiry Warning"
|
|
unique_id: infra_cert_expiry_warning
|
|
device_class: problem
|
|
state: >-
|
|
{% set d = states('sensor.infra_cert_expiry_min_days') %}
|
|
{% if d in ['unknown', 'unavailable', 'none', ''] %}
|
|
false
|
|
{% else %}
|
|
{% set days = d | float(9999) %}
|
|
{{ days < 30 and days >= 14 }}
|
|
{% endif %}
|
|
|
|
automation:
|
|
- alias: "Infrastructure - External IP Change Logbook"
|
|
id: infra_external_ip_change_logbook
|
|
description: "Log external IP changes into the Activity feed."
|
|
mode: queued
|
|
trigger:
|
|
- platform: state
|
|
entity_id: sensor.infra_external_ip
|
|
condition:
|
|
- condition: template
|
|
value_template: "{{ trigger.from_state is not none }}"
|
|
- condition: template
|
|
value_template: >-
|
|
{{ trigger.from_state.state not in ['unknown', 'unavailable', 'none', ''] and
|
|
trigger.to_state.state not in ['unknown', 'unavailable', 'none', ''] and
|
|
trigger.from_state.state != trigger.to_state.state }}
|
|
action:
|
|
- service: script.send_to_logbook
|
|
data:
|
|
topic: "NETWORK"
|
|
message: >-
|
|
External IP changed from {{ trigger.from_state.state }} to {{ trigger.to_state.state }}.
|
|
|
|
- alias: "Infrastructure - Website Uptime SLO Repair"
|
|
id: infra_website_uptime_slo_repair
|
|
description: "Create/clear Repairs issue when website 1-day uptime breaches SLO."
|
|
mode: queued
|
|
trigger:
|
|
- platform: state
|
|
entity_id: binary_sensor.infra_website_uptime_slo_breach
|
|
action:
|
|
- choose:
|
|
- conditions: "{{ trigger.to_state.state == 'on' }}"
|
|
sequence:
|
|
- service: repairs.create
|
|
data:
|
|
issue_id: infra_website_uptime_slo_breach
|
|
title: "Website uptime SLO breached"
|
|
description: >
|
|
At least one monitored website has uptime_1d below 99%.
|
|
Review Uptime Kuma entities on the Website Health dashboard.
|
|
severity: warning
|
|
persistent: true
|
|
default:
|
|
- service: repairs.remove
|
|
continue_on_error: true
|
|
data:
|
|
issue_id: infra_website_uptime_slo_breach
|
|
|
|
- alias: "Infrastructure - Website Latency Repair"
|
|
id: infra_website_latency_repair
|
|
description: "Create/clear Repairs issue when website response times degrade."
|
|
mode: queued
|
|
trigger:
|
|
- platform: state
|
|
entity_id: binary_sensor.infra_website_latency_degraded
|
|
action:
|
|
- choose:
|
|
- conditions: "{{ trigger.to_state.state == 'on' }}"
|
|
sequence:
|
|
- service: repairs.create
|
|
data:
|
|
issue_id: infra_website_latency_degraded
|
|
title: "Website latency degraded"
|
|
description: >
|
|
At least one monitored website reports avg_response_time_1d above 1.2s.
|
|
Review Uptime Kuma response-time entities on Website Health.
|
|
severity: warning
|
|
persistent: true
|
|
default:
|
|
- service: repairs.remove
|
|
continue_on_error: true
|
|
data:
|
|
issue_id: infra_website_latency_degraded
|
|
|
|
- alias: "Infrastructure - Backup Nightly Verification"
|
|
id: infra_backup_nightly_verification
|
|
description: "Use codex_appliance to verify the latest Duplicati run and dispatch Joanna only on failure."
|
|
mode: single
|
|
trigger:
|
|
- platform: time
|
|
at: "06:15:00"
|
|
action:
|
|
- variables:
|
|
trigger_context: "HA automation infra_backup_nightly_verification (Infrastructure - Backup Nightly Verification)"
|
|
duplicati_state: "{{ states('switch.duplicati_container') }}"
|
|
- action: rest_command.bearclaw_duplicati_verify
|
|
data:
|
|
reason: "ha_nightly"
|
|
response_variable: duplicati_verify
|
|
- service: script.send_to_logbook
|
|
data:
|
|
topic: "BACKUP"
|
|
message: >-
|
|
{% set payload = duplicati_verify['content'] if duplicati_verify is mapping and duplicati_verify['content'] is mapping else {} %}
|
|
{% set detail = payload['detail'] if payload is mapping and payload['detail'] is mapping else {} %}
|
|
{{ detail.get('summary', 'Nightly Duplicati verification completed.') }}
|
|
- variables:
|
|
verify_payload: "{{ duplicati_verify['content'] if duplicati_verify is mapping and duplicati_verify['content'] is mapping else {} }}"
|
|
verify_detail: "{{ verify_payload['detail'] if verify_payload is mapping and verify_payload['detail'] is mapping else {} }}"
|
|
verify_http_status: "{{ duplicati_verify['status'] | int(0) if duplicati_verify is mapping else 0 }}"
|
|
verify_healthy: "{{ verify_payload.get('ok', false) and verify_detail.get('healthy', false) }}"
|
|
verify_status: "{{ verify_detail.get('status', 'unknown') }}"
|
|
verify_summary: "{{ verify_detail.get('summary', 'Duplicati verification did not return a summary.') }}"
|
|
verify_issue: "{{ verify_detail.get('issue', verify_payload.get('error', 'duplicati_verify_failed')) }}"
|
|
verify_backup_name: "{{ verify_detail.get('backupName', 'Docker_Configs') }}"
|
|
verify_latest_result: "{{ verify_detail.get('latestResult', {}) if verify_detail is mapping else {} }}"
|
|
verify_last_success: "{{ verify_detail.get('lastSuccessfulRun', {}) if verify_detail is mapping else {} }}"
|
|
- choose:
|
|
- conditions: "{{ verify_healthy }}"
|
|
sequence:
|
|
- service: repairs.remove
|
|
continue_on_error: true
|
|
data:
|
|
issue_id: infra_duplicati_backup_failure
|
|
default:
|
|
- service: repairs.create
|
|
data:
|
|
issue_id: infra_duplicati_backup_failure
|
|
title: "Duplicati nightly backup verification failed"
|
|
description: >-
|
|
{{ verify_summary }}
|
|
Backup={{ verify_backup_name }};
|
|
status={{ verify_status }};
|
|
last_result={{ verify_latest_result.get('endedAt', 'n/a') }};
|
|
last_success={{ verify_last_success.get('endedAt', 'n/a') }}.
|
|
severity: error
|
|
persistent: true
|
|
- service: script.joanna_dispatch
|
|
data:
|
|
trigger_context: "{{ trigger_context }}"
|
|
source: "home_assistant_automation.infra_backup_nightly_verification"
|
|
summary: "Nightly Duplicati backup verification failed"
|
|
entity_ids:
|
|
- "switch.duplicati_container"
|
|
diagnostics: >-
|
|
scheduled_time=06:15:00,
|
|
duplicati_container={{ duplicati_state }},
|
|
verifier_http_status={{ verify_http_status }},
|
|
verifier_status={{ verify_status }},
|
|
verifier_issue={{ verify_issue }},
|
|
backup_name={{ verify_backup_name }},
|
|
latest_result={{ verify_latest_result.get('endedAt', 'n/a') }},
|
|
last_success={{ verify_last_success.get('endedAt', 'n/a') }}
|
|
request: >-
|
|
Investigate the Duplicati backup job {{ verify_backup_name }}.
|
|
The codex_appliance verifier reported status {{ verify_status }} with issue {{ verify_issue }}.
|
|
Use the Duplicati API or UI directly, resolve the failure if possible, and verify a successful run before closing out.
|
|
Reply with explicit status fields:
|
|
resolved=true/false,
|
|
backup_status,
|
|
last_success_time,
|
|
root_cause,
|
|
action_taken,
|
|
verification,
|
|
next_action_required=true/false.
|
|
|
|
- alias: "Infrastructure - Monthly HA Log Hygiene Review"
|
|
id: infra_monthly_log_hygiene_review
|
|
description: "Ask Joanna monthly to review Home Assistant logs, create a GitHub issue with noisy entries, and send Telegram recommendations only."
|
|
mode: single
|
|
trigger:
|
|
- platform: time
|
|
at: "03:20:00"
|
|
condition:
|
|
- condition: template
|
|
value_template: "{{ now().day == 1 }}"
|
|
variables:
|
|
trigger_context: "HA automation infra_monthly_log_hygiene_review (Infrastructure - Monthly HA Log Hygiene Review)"
|
|
action:
|
|
- service: script.joanna_dispatch
|
|
data:
|
|
trigger_context: "{{ trigger_context }}"
|
|
source: "home_assistant_automation.infra_monthly_log_hygiene_review"
|
|
summary: "Monthly Home Assistant log hygiene review with GitHub issue and Telegram follow-up"
|
|
diagnostics: >-
|
|
schedule=day_1@03:20:00,
|
|
review_scope=available_home_assistant_logs,
|
|
desired_outputs=telegram_follow_up+github_issue,
|
|
github_repo=CCOSTAN/Home-AssistantConfig,
|
|
approval_required_before_changes=true
|
|
request: >-
|
|
Review the available Home Assistant log files from the last month and identify noisy,
|
|
low-value entries that could be safely suppressed, filtered, slowed, deduplicated, or
|
|
retired. Focus on practical Home Assistant-side changes such as recorder exclusions,
|
|
logger filtering, scan-interval reductions, entity retirement, or automation de-noising.
|
|
Create or refresh a GitHub issue in CCOSTAN/Home-AssistantConfig that captures the noisy
|
|
entries, estimated frequency, why each candidate is low-value, and the exact repo files
|
|
or integrations likely to change. Then send Carlo a concise Telegram summary with the top
|
|
recommendations and the GitHub issue number or link. Do not make any changes from this
|
|
review. Wait for explicit follow-up approval first.
|
|
- service: script.send_to_logbook
|
|
data:
|
|
topic: "HOME ASSISTANT"
|
|
message: "Joanna monthly Home Assistant log hygiene review dispatched; Telegram summary and GitHub issue requested."
|