diff --git a/config/dashboards/infrastructure/partials/docker_containers_sections.yaml b/config/dashboards/infrastructure/partials/docker_containers_sections.yaml index 7b05e4fa..3417b05e 100644 --- a/config/dashboards/infrastructure/partials/docker_containers_sections.yaml +++ b/config/dashboards/infrastructure/partials/docker_containers_sections.yaml @@ -8,6 +8,7 @@ # Sections layout for the Docker containers view. # ------------------------------------------------------------------- # Notes: Auto-discovers Portainer container entities from `switch.*_container`. +# Notes: Keeps cards visible when Portainer telemetry is unavailable (degraded mode). ###################################################################### - type: grid @@ -80,6 +81,20 @@ card_mod: style: !include /config/dashboards/infrastructure/card_mod/infra_panel.yaml cards: + - type: custom:button-card + template: bearstone_infra_alert_row + entity: binary_sensor.docker_container_telemetry_degraded + name: Docker telemetry degraded + icon: mdi:lan-disconnect + variables: + alert_kind: binary_on + state_display: >- + [[[ + const unavailable = states['sensor.docker_monitored_unavailable_count']?.state ?? '0'; + const total = states['sensor.docker_monitored_container_count']?.state ?? '0'; + return `${unavailable}/${total} unavailable`; + ]]] + - type: custom:auto-entities show_empty: true grid_options: @@ -104,7 +119,5 @@ type: custom:button-card template: bearstone_infra_container_row icon: mdi:docker - exclude: - - state: unavailable sort: method: name diff --git a/config/dashboards/infrastructure/partials/home_sections.yaml b/config/dashboards/infrastructure/partials/home_sections.yaml index a5ecec9c..1f61ecf9 100644 --- a/config/dashboards/infrastructure/partials/home_sections.yaml +++ b/config/dashboards/infrastructure/partials/home_sections.yaml @@ -400,6 +400,23 @@ tap_action: action: none + - type: custom:button-card + template: bearstone_infra_alert_row + entity: binary_sensor.docker_container_telemetry_degraded + name: Docker telemetry degraded + icon: mdi:lan-disconnect + variables: + alert_kind: binary_on + tap_action: + action: navigate + navigation_path: /dashboard-infrastructure/docker + state_display: >- + [[[ + const unavailable = states['sensor.docker_monitored_unavailable_count']?.state ?? '0'; + const total = states['sensor.docker_monitored_container_count']?.state ?? '0'; + return `${unavailable}/${total} unavailable`; + ]]] + - type: custom:auto-entities show_empty: false card: @@ -424,6 +441,8 @@ icon: mdi:docker exclude: - state: 'on' + - state: unavailable + - state: unknown - type: custom:vertical-stack-in-card grid_options: diff --git a/config/dashboards/infrastructure/templates/button_card_templates.yaml b/config/dashboards/infrastructure/templates/button_card_templates.yaml index 30d9b9d1..b82a936c 100644 --- a/config/dashboards/infrastructure/templates/button_card_templates.yaml +++ b/config/dashboards/infrastructure/templates/button_card_templates.yaml @@ -216,6 +216,8 @@ bearstone_infra_container_row: image: > [[[ const ent = (entity && entity.entity_id) ? String(entity.entity_id) : ''; + const stateNow = String(entity && entity.state !== undefined ? entity.state : '').toLowerCase(); + const telemetryDegraded = states['binary_sensor.docker_container_telemetry_degraded']?.state === 'on'; let key = ''; if (ent.startsWith('binary_sensor.') && ent.endsWith('_status')) { key = ent.replace('binary_sensor.', '').replace(/_status$/, ''); @@ -227,6 +229,9 @@ bearstone_infra_container_row: : (key ? `sensor.${key}_image` : ''); const imageValue = states[imageEntity]?.state; if (!imageValue || ['unknown', 'unavailable', 'none', ''].includes(String(imageValue).toLowerCase())) { + if (telemetryDegraded && ['unknown', 'unavailable', ''].includes(stateNow)) { + return 'telemetry: delayed'; + } return 'image: n/a'; } return imageValue; @@ -234,10 +239,11 @@ bearstone_infra_container_row: status: > [[[ const s = String(entity.state || '').toLowerCase(); + const telemetryDegraded = states['binary_sensor.docker_container_telemetry_degraded']?.state === 'on'; if (s === 'on' || s === 'running') return 'RUNNING'; if (s === 'off' || s === 'stopped') return 'STOPPED'; - if (s === 'unavailable') return 'OFFLINE'; - if (s === 'unknown' || s === '') return 'UNKNOWN'; + if (s === 'unavailable') return telemetryDegraded ? 'STALE' : 'OFFLINE'; + if (s === 'unknown' || s === '') return telemetryDegraded ? 'STALE' : 'UNKNOWN'; return String(entity.state).toUpperCase(); ]]] styles: @@ -359,14 +365,25 @@ bearstone_infra_container_row: - value: unavailable styles: card: - - border-color: rgba(229,57,53,0.35) - - background: rgba(255,235,238,0.85) + - border-color: rgba(245,124,0,0.35) + - background: rgba(255,243,224,0.85) icon: - - color: rgba(198,40,40,1) + - color: rgba(230,81,0,1) custom_fields: status: - - background: rgba(198,40,40,0.10) - - color: rgba(198,40,40,1) + - background: rgba(230,81,0,0.12) + - color: rgba(230,81,0,1) + - value: unknown + styles: + card: + - border-color: rgba(245,124,0,0.35) + - background: rgba(255,243,224,0.85) + icon: + - color: rgba(230,81,0,1) + custom_fields: + status: + - background: rgba(230,81,0,0.12) + - color: rgba(230,81,0,1) bearstone_infra_panel_header: show_icon: false diff --git a/config/packages/README.md b/config/packages/README.md index 705fbc89..02441df0 100755 --- a/config/packages/README.md +++ b/config/packages/README.md @@ -45,7 +45,7 @@ Live collection of plug-and-play Home Assistant packages. Each YAML file in this | [lightning.yaml](lightning.yaml) | Blitzortung lightning counter monitoring with snoozeable push actions. | `sensor.blitzortung_lightning_counter`, `input_boolean.snooze_lightning`, notify engine actions | | [logbook_activity_feed.yaml](logbook_activity_feed.yaml) | Dummy `sensor.activity_feed` + helper to write clean Activity entries (Issue #1550). | `sensor.activity_feed`, `script.send_to_logbook` | | [mariadb_monitoring.yaml](mariadb_monitoring.yaml) | MariaDB health sensors and Lovelace dashboard snippet for recorder stats. | `sensor.mariadb_status`, `sensor.database_size` | -| [docker_infrastructure.yaml](docker_infrastructure.yaml) | Docker host patching telemetry (docker_10/14/17/69) + host-side auto-reboots + container-down Repairs alerts. | `sensor.docker_*_apt_status`, `repairs.create`, `repairs.remove` | +| [docker_infrastructure.yaml](docker_infrastructure.yaml) | Docker host patching telemetry (docker_10/14/17/69) + host-side auto-reboots + container-down Repairs alerts, with degraded-telemetry guardrails when Portainer data drops. | `sensor.docker_*_apt_status`, `binary_sensor.docker_container_telemetry_degraded`, `repairs.create`, `repairs.remove` | | [infrastructure_observability.yaml](infrastructure_observability.yaml) | Normalized WAN/DNS/backup/domain/cert health sensors used by the Infrastructure Home + Website Health dashboards. | `binary_sensor.infra_*`, `sensor.infra_*`, `script.send_to_logbook` | | [onenote_indexer.yaml](onenote_indexer.yaml) | OneNote indexer health/status monitoring for Joanna, failure-repair automation, and a daily duplicate-delete maintenance request. | `sensor.onenote_indexer_last_job_status`, `binary_sensor.onenote_indexer_last_job_successful` | | [mariadb.yaml](mariadb.yaml) | MariaDB recorder health and capacity SQL sensors. | `sensor.mariadb_status`, `sensor.database_size` | diff --git a/config/packages/docker_infrastructure.yaml b/config/packages/docker_infrastructure.yaml index b5fed89a..92b7cc6e 100644 --- a/config/packages/docker_infrastructure.yaml +++ b/config/packages/docker_infrastructure.yaml @@ -200,12 +200,47 @@ template: {% endif %} - sensor: + - name: "Docker Monitored Container Count" + unique_id: docker_monitored_container_count + icon: mdi:format-list-numbered + state: >- + {{ state_attr('group.docker_monitored_containers', 'entity_id') | default([], true) | count }} + + - name: "Docker Monitored Unavailable Count" + unique_id: docker_monitored_unavailable_count + icon: mdi:lan-disconnect + state: >- + {% set ns = namespace(keys=[], unavailable=0) %} + {% set monitored = state_attr('group.docker_monitored_containers', 'entity_id') | default([], true) %} + {% for switch_entity in monitored %} + {% set key = switch_entity | replace('switch.', '') | regex_replace('_container$', '') %} + {% if key not in ns.keys %} + {% set ns.keys = ns.keys + [key] %} + {% endif %} + {% endfor %} + {% for key in ns.keys %} + {% set status_entity = 'binary_sensor.' ~ key ~ '_status' %} + {% set switch_entity = 'switch.' ~ key ~ '_container' %} + {% if expand(status_entity) | count > 0 %} + {% set effective_state = states(status_entity) | lower %} + {% elif expand(switch_entity) | count > 0 %} + {% set effective_state = states(switch_entity) | lower %} + {% else %} + {% set effective_state = 'unknown' %} + {% endif %} + {% if effective_state == 'unavailable' %} + {% set ns.unavailable = ns.unavailable + 1 %} + {% endif %} + {% endfor %} + {{ ns.unavailable }} + - name: "Docker Containers Down List" unique_id: docker_containers_down_list icon: mdi:docker state: >- {% set ns = namespace(keys=[], down=[]) %} {% set monitored = state_attr('group.docker_monitored_containers', 'entity_id') | default([], true) %} + {% set telemetry_degraded = is_state('binary_sensor.docker_container_telemetry_degraded', 'on') %} {% for switch_entity in monitored %} {% set key = switch_entity | replace('switch.', '') | regex_replace('_container$', '') %} {% if key not in ns.keys %} @@ -222,7 +257,9 @@ template: {% else %} {% set effective_state = 'unknown' %} {% endif %} - {% if effective_state in ['off', 'unknown', 'unavailable'] %} + {% if effective_state in ['off', 'stopped'] %} + {% set ns.down = ns.down + [key] %} + {% elif not telemetry_degraded and effective_state in ['unknown', 'unavailable'] %} {% set ns.down = ns.down + [key] %} {% endif %} {% endfor %} @@ -241,6 +278,16 @@ template: {% endif %} - binary_sensor: + - name: "Docker Container Telemetry Degraded" + unique_id: docker_container_telemetry_degraded + device_class: problem + icon: mdi:lan-disconnect + state: >- + {% set total = states('sensor.docker_monitored_container_count') | int(0) %} + {% set unavailable = states('sensor.docker_monitored_unavailable_count') | int(0) %} + {% set threshold = [3, ((total * 0.6) | round(0, 'ceil') | int(0))] | max %} + {{ total > 0 and unavailable >= threshold }} + - name: "Docker Container Alerts Snoozed" unique_id: docker_container_alerts_snoozed device_class: problem @@ -266,7 +313,7 @@ script: example: 5 sequence: - variables: - down_states: ['off', 'unknown', 'unavailable'] + down_states: ['off', 'stopped', 'unknown', 'unavailable'] src_entity: "{{ entity_id | default('', true) }}" op: "{{ operation | default('create', true) | lower }}" wait_minutes: "{{ delay_minutes | default(0) | int(0) }}" @@ -304,9 +351,12 @@ script: minutes: "{{ wait_minutes }}" - variables: effective_state: "{{ states(effective_entity) | lower }}" + telemetry_degraded: "{{ is_state('binary_sensor.docker_container_telemetry_degraded', 'on') }}" container_name: "{{ state_attr(effective_entity, 'friendly_name') | default(container_key, true) }}" - condition: template - value_template: "{{ effective_state in down_states }}" + value_template: >- + {{ effective_state in down_states and + not (telemetry_degraded and effective_state in ['unknown', 'unavailable']) }} - condition: state entity_id: binary_sensor.docker_container_alerts_snoozed state: "off" @@ -453,12 +503,15 @@ automation: value_template: "{{ trigger.event.data.old_state.state != trigger.event.data.new_state.state }}" action: - variables: - down_states: ['off', 'unknown', 'unavailable'] + down_states: ['off', 'stopped', 'unknown', 'unavailable'] entity_id: "{{ trigger.event.data.entity_id }}" old_state: "{{ trigger.event.data.old_state.state | lower }}" new_state: "{{ trigger.event.data.new_state.state | lower }}" - choose: - - conditions: "{{ new_state in down_states and old_state not in down_states }}" + - conditions: >- + {{ new_state in down_states and old_state not in down_states and + not (is_state('binary_sensor.docker_container_telemetry_degraded', 'on') and + new_state in ['unknown', 'unavailable']) }} sequence: - service: script.docker_container_repairs_sync data: