"""Start Amazon Transcribe job with language selection.""" import os import boto3 from shared import S3_BUCKET, update_job transcribe = boto3.client('transcribe') AUTO_DETECT_LANGUAGES = ['zh-CN', 'zh-TW', 'yue-CN', 'en-US'] # Amazon Transcribe 语言代码映射 LANG_MAP = { 'zh-CN': 'zh-CN', 'zh-TW': 'zh-TW', 'zh-HK': 'zh-HK', 'en-US': 'en-US', } def handler(event, context): job_id = event['job_id'] audio_s3_key = event['audio_s3_key'] language = event.get('language', 'auto') transcribe_job = f"sp-{job_id}" update_job(job_id, status='TRANSCRIBING') params = { 'TranscriptionJobName': transcribe_job, 'Media': {'MediaFileUri': f"s3://{S3_BUCKET}/{audio_s3_key}"}, 'OutputBucketName': S3_BUCKET, 'OutputKey': f"jobs/{job_id}/transcribe-output.json", 'Settings': { 'ShowSpeakerLabels': True, 'MaxSpeakerLabels': 10, }, } if language == 'auto': # 自动语言识别,提供候选语言列表 params['IdentifyLanguage'] = True params['LanguageOptions'] = AUTO_DETECT_LANGUAGES else: # 指定语言 params['LanguageCode'] = LANG_MAP.get(language, language) transcribe.start_transcription_job(**params) return {**event, 'transcribe_job': transcribe_job}