#!/usr/bin/python3 """ CountyIDGen.py by Mason Nelson =============================================================================== This script is a utility for generating WAV audio files corresponding to each county code defined in the SkywarnPlus config.yaml. The audio files are generated using the Voice RSS Text-to-Speech API and the settings defined in the config.yaml. This script will generate the files, save them in the correct location, and automatically modify the SkywarnPlus config.yaml to utilize them. This file is part of SkywarnPlus. SkywarnPlus is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. SkywarnPlus is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SkywarnPlus. If not, see . """ import os import io import re import sys import requests import logging import zipfile from datetime import datetime from ruamel.yaml import YAML from pydub import AudioSegment from pydub.silence import split_on_silence # Initialize YAML yaml = YAML() # Directories and Paths BASE_DIR = os.path.dirname(os.path.realpath(__file__)) CONFIG_PATH = os.path.join(BASE_DIR, "config.yaml") COUNTY_CODES_PATH = os.path.join(BASE_DIR, "CountyCodes.md") # Load configurations with open(CONFIG_PATH, "r") as config_file: config = yaml.load(config_file) # Logging setup LOG_CONFIG = config.get("Logging", {}) ENABLE_DEBUG = LOG_CONFIG.get("Debug", False) LOG_FILE = LOG_CONFIG.get("LogPath", os.path.join(BASE_DIR, "SkywarnPlus.log")) # Set up logging LOGGER = logging.getLogger(__name__) LOGGER.setLevel(logging.DEBUG if ENABLE_DEBUG else logging.INFO) # Set up log message formatting LOG_FORMATTER = logging.Formatter("%(asctime)s %(levelname)s %(message)s") # Set up console log handler C_HANDLER = logging.StreamHandler() C_HANDLER.setFormatter(LOG_FORMATTER) LOGGER.addHandler(C_HANDLER) # Ensure the directory for the log file exists log_directory = os.path.dirname(LOG_FILE) if not os.path.exists(log_directory): os.makedirs(log_directory) # Set up file log handler F_HANDLER = logging.FileHandler(LOG_FILE) F_HANDLER.setFormatter(LOG_FORMATTER) LOGGER.addHandler(F_HANDLER) # Extract API parameters from the config API_KEY = config["SkyDescribe"]["APIKey"] LANGUAGE = config["SkyDescribe"]["Language"] SPEED = str(config["SkyDescribe"]["Speed"]) VOICE = config["SkyDescribe"]["Voice"] SOUNDS_PATH = config.get("Alerting", {}).get( "SoundsPath", os.path.join(BASE_DIR, "SOUNDS") ) def generate_wav(api_key, language, speed, voice, text, output_file): """ Convert the given text to audio using the Voice RSS Text-to-Speech API and trims silence. """ base_url = "http://api.voicerss.org/" params = { "key": api_key, "hl": language, "src": text, "c": "WAV", "f": "8khz_16bit_mono", "r": str(speed), "v": voice, } response = requests.get(base_url, params=params) response.raise_for_status() # If the response text contains "ERROR" then log it and exit if "ERROR" in response.text: LOGGER.error("SkyDescribe: %s", response.text) sys.exit(1) # Load the audio data into pydub's AudioSegment sound = AudioSegment.from_wav(io.BytesIO(response.content)) # Normalize the entire audio clip target_dBFS = -6.0 gain_difference = target_dBFS - sound.max_dBFS sound = sound.apply_gain(gain_difference) # Split track where silence is 100ms or more and get chunks chunks = split_on_silence(sound, min_silence_len=200, silence_thresh=-40) # If there are chunks, concatenate all of them if chunks: combined_sound = sum(chunks, AudioSegment.empty()) # Export the combined audio combined_sound.export(output_file, format="wav") else: # If there are no chunks, just save the original audio sound.export(output_file, format="wav") def sanitize_text_for_tts(text): """ Sanitize the text for TTS processing. Remove characters that aren't alphanumeric or whitespace. """ sanitized_text = re.sub(r"[^a-zA-Z0-9\s]", "", text) return sanitized_text def backup_existing_files(path, filename_pattern, backup_name): """ Backup files matching the filename pattern in the specified path to a zip file. """ files_to_backup = [ f for f in os.listdir(path) if f.startswith(filename_pattern) and f.endswith(".wav") ] if not files_to_backup: return with zipfile.ZipFile(backup_name, "w") as zipf: for file in files_to_backup: zipf.write(os.path.join(path, file), file) def process_county_codes(): """ Process county codes and make changes. """ new_county_codes = [] for entry in config["Alerting"]["CountyCodes"]: overwrite = False if isinstance(entry, str): # County code without WAV file county_code = entry elif isinstance(entry, dict): # County code with WAV file county_code = list(entry.keys())[0] county_name = county_data.get(county_code) sanitized_county_name = sanitize_text_for_tts(county_name) expected_wav_file = "{}.wav".format(sanitized_county_name) if os.path.exists(os.path.join(SOUNDS_PATH, expected_wav_file)): if not overwrite: user_input = input( "The WAV file for {} ({}) already exists. Do you want to overwrite it? [yes/no]: ".format( county_name, expected_wav_file ) ).lower() if user_input != "yes": LOGGER.info( "Skipping generation for {} due to user input.".format( county_name ) ) new_county_codes.append({county_code: expected_wav_file}) continue # Skip to the next county code overwrite = True # At this point, we are sure that we either have a new county code or the user has agreed to overwrite. county_name = county_data.get(county_code) if county_name: sanitized_county_name = sanitize_text_for_tts(county_name) output_file = os.path.join( SOUNDS_PATH, "{}.wav".format(sanitized_county_name) ) generate_wav( API_KEY, LANGUAGE, SPEED, VOICE, sanitized_county_name, output_file ) # Add the mapping for the county code to the new list new_county_codes.append( {county_code: "{}.wav".format(sanitized_county_name)} ) # Replace the old CountyCodes list with the new one config["Alerting"]["CountyCodes"] = new_county_codes def load_county_codes_from_md(md_file_path): """ Load county names from the MD file and return a dictionary mapping county codes to county names. """ with open(md_file_path, "r") as file: lines = file.readlines() county_data = {} for line in lines: if line.startswith("|") and "County Name" not in line and "-----" not in line: _, county_name, code, _ = line.strip().split("|") county_data[code.strip()] = county_name.strip() return county_data def display_initial_warning(): warning_message = """ ============================================================ WARNING: Please read the following information carefully before proceeding. This utility is designed to generate WAV audio files corresponding to each county code defined in the SkywarnPlus config.yaml using the Voice RSS Text-to-Speech API. The generated audio files will be saved in the appropriate location, and the SkywarnPlus config.yaml will be automatically updated to use them. However, a few things to keep in mind: - The script will only attempt to generate WAV files for county codes that are defined in the config. - Pronunciations for some county names might not be accurate. In such cases, you may need to manually create the files using VoiceRSS. This might involve intentionally misspelling the county name to achieve the desired pronunciation. - This script will attempt to backup any files before it modifies them, but it is always a good idea to manually back up your existing configuration and files before running this script. - This script will modify your config.yaml file, so you should ALWAYS double check the changes it makes. There might be improperly formatted indentations, comments, etc. that you will need to fix manually. Proceed with caution. ============================================================ """ print(warning_message) # Display the initial warning display_initial_warning() # Wait for user acknowledgment before proceeding. user_input = input("Do you want to proceed? [yes/no]: ").lower() if user_input != "yes": LOGGER.info("Aborting process due to user input.") sys.exit() # Load county names and generate WAV files backup_date = datetime.now().strftime("%Y%m%d") backup_name = os.path.join(SOUNDS_PATH, "CountyID_Backup_{}.zip".format(backup_date)) backup_existing_files(SOUNDS_PATH, "", backup_name) # Load county codes and names county_data = load_county_codes_from_md(COUNTY_CODES_PATH) # Call the function to process the county codes process_county_codes() # Update config.yaml to reflect the WAV file mappings for i, county_code in enumerate(config["Alerting"]["CountyCodes"]): if isinstance(county_code, str): county_name = county_data.get(county_code) if county_name: sanitized_county_name = sanitize_text_for_tts(county_name) config["Alerting"]["CountyCodes"][i] = { county_code: "{}.wav".format(sanitized_county_name) } # Write the updated config.yaml with open(CONFIG_PATH, "w") as config_file: yaml.indent(sequence=4, offset=2) yaml.dump(config, config_file) LOGGER.info("County WAV files generation completed.")