| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955 |
- """
- Report Generator Service
- This module handles Word document generation from AWS scan results.
- It loads templates, replaces placeholders, generates tables, and produces
- the final report document.
- """
- import os
- import re
- import copy
- from datetime import datetime
- from typing import Dict, List, Any, Optional, Tuple
- from docx import Document
- from docx.shared import Inches, Pt, Cm
- from docx.enum.text import WD_ALIGN_PARAGRAPH
- from docx.enum.table import WD_TABLE_ALIGNMENT
- from docx.oxml.ns import qn
- from docx.oxml import OxmlElement
- class TableLayout:
- """Table layout types for different services"""
- HORIZONTAL = 'horizontal' # Column headers at top, multiple rows
- VERTICAL = 'vertical' # Attribute names in left column, values in right
- # Service configuration matching the design document
- SERVICE_CONFIG = {
- # ===== VPC Related Resources =====
- 'vpc': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'VPC',
- 'columns': ['Region', 'Name', 'ID', 'CIDR'],
- },
- 'subnet': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Subnet',
- 'columns': ['Name', 'ID', 'AZ', 'CIDR'],
- },
- 'route_table': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Route Table',
- 'columns': ['Name', 'ID', 'Subnet Associations'],
- },
- 'internet_gateway': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Internet Gateway',
- 'columns': ['Name', 'ID'],
- },
- 'nat_gateway': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'NAT Gateway',
- 'columns': ['Name', 'ID', 'Public IP', 'Private IP'],
- },
- 'security_group': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Security Group',
- 'columns': ['Name', 'ID', 'Protocol', 'Port range', 'Source'],
- },
- 'vpc_endpoint': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Endpoint',
- 'columns': ['Name', 'ID', 'VPC', 'Service Name', 'Type'],
- },
- 'vpc_peering': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'VPC Peering',
- 'columns': ['Name', 'Peering Connection ID', 'Requester VPC', 'Accepter VPC'],
- },
- 'customer_gateway': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Customer Gateway',
- 'columns': ['Name', 'Customer Gateway ID', 'IP Address'],
- },
- 'virtual_private_gateway': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Virtual Private Gateway',
- 'columns': ['Name', 'Virtual Private Gateway ID', 'VPC'],
- },
- 'vpn_connection': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'VPN Connection',
- 'columns': ['Name', 'VPN ID', 'Routes'],
- },
-
- # ===== EC2 Related Resources =====
- 'ec2': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Instance',
- 'columns': ['Name', 'Instance ID', 'Instance Type', 'AZ', 'AMI',
- 'Public IP', 'Public DNS', 'Private IP', 'VPC ID', 'Subnet ID',
- 'Key', 'Security Groups', 'EBS Type', 'EBS Size', 'Encryption',
- 'Other Requirement'],
- },
- 'elastic_ip': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Elastic IP',
- 'columns': ['Name', 'Elastic IP'],
- },
-
- # ===== Auto Scaling =====
- 'autoscaling': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Auto Scaling Group',
- 'columns': ['Name', 'Launch Template', 'AMI', 'Instance type',
- 'Key', 'Target Groups', 'Desired', 'Min', 'Max',
- 'Scaling Policy'],
- },
-
- # ===== ELB Related Resources =====
- 'elb': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Load Balancer',
- 'columns': ['Name', 'Type', 'DNS', 'Scheme', 'VPC',
- 'Availability Zones', 'Subnet', 'Security Groups'],
- },
- 'target_group': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Target Group',
- 'columns': ['Load Balancer', 'TG Name', 'Port', 'Protocol',
- 'Registered Instances', 'Health Check Path'],
- },
-
- # ===== RDS =====
- 'rds': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'DB Instance',
- 'columns': ['Region', 'Endpoint', 'DB instance ID', 'DB name',
- 'Master Username', 'Port', 'DB Engine', 'DB Version',
- 'Instance Type', 'Storage type', 'Storage', 'Multi-AZ',
- 'Security Group', 'Deletion Protection',
- 'Performance Insights Enabled', 'CloudWatch Logs'],
- },
-
- # ===== ElastiCache =====
- 'elasticache': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Cache Cluster',
- 'columns': ['Cluster ID', 'Engine', 'Engine Version', 'Node Type',
- 'Num Nodes', 'Status'],
- },
-
- # ===== EKS =====
- 'eks': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Cluster',
- 'columns': ['Cluster Name', 'Version', 'Status', 'Endpoint', 'VPC ID'],
- },
-
- # ===== Lambda =====
- 'lambda': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Function',
- 'columns': ['Function Name', 'Runtime', 'Memory (MB)', 'Timeout (s)', 'Last Modified'],
- },
-
- # ===== S3 =====
- 's3': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Bucket',
- 'columns': ['Region', 'Bucket Name'],
- },
- 's3_event_notification': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'S3 event notification',
- 'columns': ['Bucket', 'Name', 'Event Type', 'Destination type', 'Destination'],
- },
-
- # ===== CloudFront (Global) =====
- 'cloudfront': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Distribution',
- 'columns': ['CloudFront ID', 'Domain Name', 'CNAME',
- 'Origin Domain Name', 'Origin Protocol Policy',
- 'Viewer Protocol Policy', 'Allowed HTTP Methods',
- 'Cached HTTP Methods'],
- },
-
- # ===== Route 53 (Global) =====
- 'route53': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Hosted Zone',
- 'columns': ['Zone ID', 'Name', 'Type', 'Record Count'],
- },
-
- # ===== ACM (Global) =====
- 'acm': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'ACM',
- 'columns': ['Domain name', 'Additional names'],
- },
-
- # ===== WAF (Global) =====
- 'waf': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Web ACL',
- 'columns': ['WebACL Name', 'Scope', 'Rules Count', 'Associated Resources'],
- },
-
- # ===== SNS =====
- 'sns': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Topic',
- 'columns': ['Topic Name', 'Topic Display Name', 'Subscription Protocol',
- 'Subscription Endpoint'],
- },
-
- # ===== CloudWatch =====
- 'cloudwatch': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Log Group',
- 'columns': ['Log Group Name', 'Retention Days', 'Stored Bytes', 'KMS Encryption'],
- },
-
- # ===== EventBridge =====
- 'eventbridge': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Rule',
- 'columns': ['Name', 'Description', 'Event Bus', 'State'],
- },
-
- # ===== CloudTrail =====
- 'cloudtrail': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Trail',
- 'columns': ['Name', 'Multi-Region Trail', 'Log File Validation', 'KMS Encryption'],
- },
-
- # ===== Config =====
- 'config': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Config',
- 'columns': ['Name', 'Regional Resources', 'Global Resources', 'Retention period'],
- },
- }
- # Service display order for the report
- SERVICE_ORDER = [
- 'vpc', 'subnet', 'route_table', 'internet_gateway', 'nat_gateway',
- 'security_group', 'vpc_endpoint', 'vpc_peering',
- 'customer_gateway', 'virtual_private_gateway', 'vpn_connection',
- 'ec2', 'elastic_ip', 'autoscaling',
- 'elb', 'target_group',
- 'rds', 'elasticache', 'eks',
- 'lambda', 's3', 's3_event_notification',
- 'cloudfront', 'route53', 'acm', 'waf',
- 'sns', 'cloudwatch', 'eventbridge', 'cloudtrail', 'config'
- ]
- # Global services (not region-specific, should not be duplicated per region)
- GLOBAL_SERVICES = ['cloudfront', 'route53', 'waf', 's3', 's3_event_notification', 'cloudtrail']
- # Service grouping for Heading 2 titles
- # Maps service keys to their parent service group for the heading
- SERVICE_GROUPS = {
- # VPC group - all VPC related resources under "VPC" heading
- 'vpc': 'VPC',
- 'subnet': 'VPC',
- 'route_table': 'VPC',
- 'internet_gateway': 'VPC',
- 'nat_gateway': 'VPC',
- 'security_group': 'VPC',
- 'vpc_endpoint': 'VPC',
- 'vpc_peering': 'VPC',
- 'customer_gateway': 'VPC',
- 'virtual_private_gateway': 'VPC',
- 'vpn_connection': 'VPC',
-
- # EC2 group
- 'ec2': 'EC2',
- 'elastic_ip': 'EC2',
-
- # Auto Scaling
- 'autoscaling': 'AutoScaling',
-
- # ELB group - Load Balancer and Target Group under "ELB" heading
- 'elb': 'ELB',
- 'target_group': 'ELB',
-
- # Database services - use service name as heading
- 'rds': 'RDS',
- 'elasticache': 'Elasticache',
- 'eks': 'EKS',
-
- # Lambda
- 'lambda': 'Lambda',
-
- # S3 group - Bucket and event notification under "S3" heading
- 's3': 'S3',
- 's3_event_notification': 'S3',
-
- # Global services
- 'cloudfront': 'CloudFront',
- 'route53': 'Route53',
- 'acm': 'ACM',
- 'waf': 'WAF',
-
- # Monitoring services
- 'sns': 'SNS',
- 'cloudwatch': 'CloudWatch',
- 'eventbridge': 'EventBridge',
- 'cloudtrail': 'CloudTrail',
- 'config': 'Config',
- }
- # Order of service groups for the report (determines heading order)
- SERVICE_GROUP_ORDER = [
- 'VPC', 'EC2', 'AutoScaling', 'ELB',
- 'RDS', 'Elasticache', 'EKS', 'Lambda', 'S3',
- 'CloudFront', 'Route53', 'ACM', 'WAF',
- 'SNS', 'CloudWatch', 'EventBridge', 'CloudTrail', 'Config'
- ]
- class ReportGenerator:
- """
- Generates Word reports from AWS scan results using templates.
-
- This class handles:
- - Loading Word templates from sample-reports folder
- - Parsing and replacing placeholders
- - Generating horizontal and vertical tables for different services
- - Embedding network diagrams
- - Updating table of contents
- """
-
- def __init__(self, template_path: str = None):
- """
- Initialize the report generator.
-
- Args:
- template_path: Path to the Word template file. If None, uses default template.
- """
- self.template_path = template_path
- self.document = None
- self._placeholder_pattern = re.compile(r'\[([^\]]+)\]')
-
- def load_template(self, template_path: str = None) -> Document:
- """
- Load a Word template file.
-
- Args:
- template_path: Path to the template file
-
- Returns:
- Loaded Document object
-
- Raises:
- FileNotFoundError: If template file doesn't exist
- ValueError: If template file is invalid
- """
- path = template_path or self.template_path
- if not path:
- # Use default template
- path = self._get_default_template_path()
-
- if not os.path.exists(path):
- raise FileNotFoundError(f"Template file not found: {path}")
-
- try:
- self.document = Document(path)
- return self.document
- except Exception as e:
- raise ValueError(f"Failed to load template: {str(e)}")
-
- def _get_default_template_path(self) -> str:
- """Get the default template path from sample-reports folder."""
- # Look for the template with placeholders
- base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
- sample_reports_dir = os.path.join(base_dir, 'sample-reports')
-
- # Prefer the template with [Client Name]-[Project Name] format
- template_name = '[Client Name]-[Project Name]-Project-Report-v1.0.docx'
- template_path = os.path.join(sample_reports_dir, template_name)
-
- if os.path.exists(template_path):
- return template_path
-
- # Fall back to any .docx file in sample-reports
- if os.path.exists(sample_reports_dir):
- for file in os.listdir(sample_reports_dir):
- if file.endswith('.docx'):
- return os.path.join(sample_reports_dir, file)
-
- raise FileNotFoundError("No template file found in sample-reports folder")
-
- def find_placeholders(self) -> List[str]:
- """
- Find all placeholders in the document.
-
- Returns:
- List of placeholder names (without brackets)
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- placeholders = set()
-
- # Search in paragraphs
- for paragraph in self.document.paragraphs:
- matches = self._placeholder_pattern.findall(paragraph.text)
- placeholders.update(matches)
-
- # Search in tables
- for table in self.document.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- matches = self._placeholder_pattern.findall(paragraph.text)
- placeholders.update(matches)
-
- # Search in headers and footers
- for section in self.document.sections:
- for header in [section.header, section.first_page_header, section.even_page_header]:
- if header:
- for paragraph in header.paragraphs:
- matches = self._placeholder_pattern.findall(paragraph.text)
- placeholders.update(matches)
-
- for footer in [section.footer, section.first_page_footer, section.even_page_footer]:
- if footer:
- for paragraph in footer.paragraphs:
- matches = self._placeholder_pattern.findall(paragraph.text)
- placeholders.update(matches)
-
- return list(placeholders)
-
- def get_template_structure(self) -> Dict[str, Any]:
- """
- Analyze and return the template structure.
-
- Returns:
- Dictionary containing template structure information
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- structure = {
- 'sections': len(self.document.sections),
- 'paragraphs': len(self.document.paragraphs),
- 'tables': len(self.document.tables),
- 'placeholders': self.find_placeholders(),
- 'headings': [],
- }
-
- # Extract headings
- for paragraph in self.document.paragraphs:
- if paragraph.style and paragraph.style.name.startswith('Heading'):
- structure['headings'].append({
- 'level': paragraph.style.name,
- 'text': paragraph.text
- })
-
- return structure
-
- def replace_placeholders(self, replacements: Dict[str, str]) -> None:
- """
- Replace all placeholders in the document with actual values.
-
- Args:
- replacements: Dictionary mapping placeholder names to values
- e.g., {'Client Name': 'Acme Corp', 'Project Name': 'Cloud Migration'}
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Replace in paragraphs
- for paragraph in self.document.paragraphs:
- self._replace_in_paragraph(paragraph, replacements)
-
- # Replace in tables
- for table in self.document.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- self._replace_in_paragraph(paragraph, replacements)
-
- # Replace in headers and footers
- for section in self.document.sections:
- for header in [section.header, section.first_page_header, section.even_page_header]:
- if header:
- for paragraph in header.paragraphs:
- self._replace_in_paragraph(paragraph, replacements)
-
- for footer in [section.footer, section.first_page_footer, section.even_page_footer]:
- if footer:
- for paragraph in footer.paragraphs:
- self._replace_in_paragraph(paragraph, replacements)
-
- def _replace_in_paragraph(self, paragraph, replacements: Dict[str, str]) -> None:
- """
- Replace placeholders in a single paragraph while preserving formatting.
-
- Supports both bracketed placeholders like [Client Name] and
- unbracketed placeholders like YYYY. mm. DD.
-
- Args:
- paragraph: The paragraph to process
- replacements: Dictionary of placeholder replacements
- """
- if not paragraph.text:
- return
-
- # Check if paragraph contains any placeholders (bracketed or unbracketed)
- text = paragraph.text
- has_placeholder = False
- for placeholder in replacements.keys():
- # Check for bracketed placeholder [placeholder]
- if f'[{placeholder}]' in text:
- has_placeholder = True
- break
- # Check for unbracketed placeholder (for date formats like YYYY. mm. DD)
- if placeholder in text:
- has_placeholder = True
- break
-
- if not has_placeholder:
- return
-
- # Replace placeholders in the text
- new_text = text
- for placeholder, value in replacements.items():
- # First try bracketed replacement
- new_text = new_text.replace(f'[{placeholder}]', str(value) if value else '')
- # Then try unbracketed replacement (for date formats like YYYY. mm. DD)
- # Only replace patterns that start with YYYY to avoid replacing column names like "Date"
- if placeholder.startswith('YYYY'):
- new_text = new_text.replace(placeholder, str(value) if value else '')
-
- # If text changed, update the paragraph
- if new_text != text:
- # Try to preserve formatting by updating runs
- if len(paragraph.runs) == 1:
- paragraph.runs[0].text = new_text
- else:
- # For complex formatting, rebuild the paragraph
- # Store the first run's formatting
- if paragraph.runs:
- first_run = paragraph.runs[0]
- font_name = first_run.font.name
- font_size = first_run.font.size
- bold = first_run.font.bold
- italic = first_run.font.italic
-
- # Clear all runs
- for run in paragraph.runs:
- run.text = ''
-
- # Set new text on first run
- paragraph.runs[0].text = new_text
- else:
- # No runs, add new one
- paragraph.add_run(new_text)
-
- def create_project_metadata_replacements(self, metadata: Dict[str, Any]) -> Dict[str, str]:
- """
- Create placeholder replacements from project metadata.
-
- Args:
- metadata: Project metadata dictionary containing:
- - clientName/client_name, projectName/project_name
- - bdManager/bd_manager, bdManagerEmail/bd_manager_email
- - solutionsArchitect/solutions_architect, solutionsArchitectEmail/solutions_architect_email
- - cloudEngineer/cloud_engineer, cloudEngineerEmail/cloud_engineer_email
-
- Returns:
- Dictionary of placeholder replacements
- """
- now = datetime.now()
-
- # Helper to get value from either camelCase or snake_case key
- def get_value(camel_key: str, snake_key: str) -> str:
- return metadata.get(camel_key, '') or metadata.get(snake_key, '') or ''
-
- # Extract values supporting both naming conventions
- client_name = get_value('clientName', 'client_name')
- project_name = get_value('projectName', 'project_name')
- bd_manager = get_value('bdManager', 'bd_manager')
- bd_manager_email = get_value('bdManagerEmail', 'bd_manager_email')
- solutions_architect = get_value('solutionsArchitect', 'solutions_architect')
- solutions_architect_email = get_value('solutionsArchitectEmail', 'solutions_architect_email')
- cloud_engineer = get_value('cloudEngineer', 'cloud_engineer')
- cloud_engineer_email = get_value('cloudEngineerEmail', 'cloud_engineer_email')
-
- replacements = {
- # Client and Project
- 'Client Name': client_name,
- 'Project Name': project_name,
-
- # BD Manager
- 'BD Manager': bd_manager,
- 'BD Manager Name': bd_manager,
- 'BD Manager Email': bd_manager_email,
-
- # Solutions Architect
- 'Solutions Architect': solutions_architect,
- 'Solutions Architect Name': solutions_architect,
- 'Solutions Architect Email': solutions_architect_email,
-
- # Cloud Engineer
- 'Cloud Engineer': cloud_engineer,
- 'Cloud Engineer Name': cloud_engineer,
- 'Cloud Engineer Email': cloud_engineer_email,
-
- # Date placeholders - multiple formats
- 'Date': now.strftime('%Y-%m-%d'),
- 'YYYY. mm. DD': now.strftime('%Y. %m. %d'),
- 'YYYY.mm.DD': now.strftime('%Y.%m.%d'),
- 'YYYY-mm-DD': now.strftime('%Y-%m-%d'),
- 'Month': now.strftime('%B'),
- 'Year': str(now.year),
- 'Report Date': now.strftime('%B %d, %Y'),
-
- # Version info
- 'Version': '1.0',
- 'Document Version': '1.0',
- }
-
- return replacements
-
- def add_horizontal_table(self, service_key: str, resources: List[Dict[str, Any]],
- include_account_column: bool = False) -> None:
- """
- Add a horizontal table for a service (column headers at top, multiple rows).
-
- Format:
- | Service Name (merged across all columns) |
- | Column1 | Column2 | Column3 |
- | Value1 | Value2 | Value3 |
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resources: List of resource dictionaries
- include_account_column: Whether to include AWS Account column (for multi-account)
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- config = SERVICE_CONFIG[service_key]
- if config['layout'] != TableLayout.HORIZONTAL:
- raise ValueError(f"Service {service_key} uses vertical layout, not horizontal")
-
- columns = list(config['columns'])
- if include_account_column and 'AWS Account' not in columns:
- columns.insert(0, 'AWS Account')
-
- # Create table: 1 title row + 1 header row + data rows
- num_rows = len(resources) + 2 # +1 for title, +1 for header
- num_cols = len(columns)
- table = self.document.add_table(rows=num_rows, cols=num_cols)
-
- # Apply table styling
- self._copy_table_style_from_template(table)
-
- # Row 0: Service title (merged across all columns)
- title_row = table.rows[0]
- # Merge all cells in the title row
- title_cell = title_row.cells[0]
- for i in range(1, num_cols):
- title_cell.merge(title_row.cells[i])
- title_cell.text = config['title']
- self._apply_header_cell_style(title_cell, is_title=True)
- # Center the title
- for paragraph in title_cell.paragraphs:
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
- # Row 1: Column headers
- header_row = table.rows[1]
- for i, col_name in enumerate(columns):
- cell = header_row.cells[i]
- cell.text = col_name
- self._apply_header_cell_style(cell)
-
- # Data rows
- for row_idx, resource in enumerate(resources):
- row = table.rows[row_idx + 2] # +2 to skip title and header rows
- for col_idx, col_name in enumerate(columns):
- cell = row.cells[col_idx]
- value = self._get_resource_value(resource, col_name)
- cell.text = value
-
- # Add spacing after table
- self.document.add_paragraph()
-
- def add_vertical_table(self, service_key: str, resource: Dict[str, Any],
- include_account_column: bool = False,
- show_title: bool = True) -> None:
- """
- Add a vertical table for a single resource (attribute names in left column).
-
- Format:
- | Service Name (merged across 2 columns) |
- | Column1 | Value1 |
- | Column2 | Value2 |
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resource: Single resource dictionary
- include_account_column: Whether to include AWS Account row (for multi-account)
- show_title: Whether to show the service title row (first resource shows title)
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- config = SERVICE_CONFIG[service_key]
- if config['layout'] != TableLayout.VERTICAL:
- raise ValueError(f"Service {service_key} uses horizontal layout, not vertical")
-
- columns = list(config['columns'])
- if include_account_column and 'AWS Account' not in columns:
- columns.insert(0, 'AWS Account')
-
- # Create table with 2 columns: 1 title row + attribute rows
- num_rows = len(columns) + (1 if show_title else 0) # +1 for title row if showing
- table = self.document.add_table(rows=num_rows, cols=2)
-
- # Apply table styling
- self._copy_table_style_from_template(table)
-
- row_offset = 0
-
- # Row 0: Service title (merged across 2 columns) - only for first resource
- if show_title:
- title_row = table.rows[0]
- title_cell = title_row.cells[0]
- title_cell.merge(title_row.cells[1])
- title_cell.text = config['title']
- self._apply_header_cell_style(title_cell, is_title=True)
- # Center the title
- for paragraph in title_cell.paragraphs:
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
- row_offset = 1
-
- # Attribute rows
- for row_idx, col_name in enumerate(columns):
- row = table.rows[row_idx + row_offset]
- # Attribute name cell (apply header styling)
- name_cell = row.cells[0]
- name_cell.text = col_name
- self._apply_header_cell_style(name_cell)
-
- # Value cell
- value_cell = row.cells[1]
- value = self._get_resource_value(resource, col_name)
- value_cell.text = value
-
- # Add spacing after table
- self.document.add_paragraph()
-
- def add_vertical_tables_for_service(self, service_key: str, resources: List[Dict[str, Any]],
- include_account_column: bool = False) -> None:
- """
- Add vertical tables for all resources of a service.
- Each resource gets its own table with the service title in the first row.
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resources: List of resource dictionaries
- include_account_column: Whether to include AWS Account row
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- # Add a table for each resource, each with its own title row
- for resource in resources:
- self.add_vertical_table(service_key, resource, include_account_column, show_title=True)
-
- def _insert_element_at_position(self, element) -> None:
- """
- Insert an element at the tracked position within Implementation List section.
-
- Args:
- element: The XML element to insert
- """
- if self._insert_parent is not None and self._insert_index is not None:
- self._insert_parent.insert(self._insert_index, element)
- self._insert_index += 1
- else:
- # Fallback: append to document body
- self.document._body._body.append(element)
-
- def _add_horizontal_table_at_position(self, service_key: str, resources: List[Dict[str, Any]],
- include_account_column: bool = False) -> None:
- """
- Add a horizontal table at the tracked position within Implementation List section.
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resources: List of resource dictionaries
- include_account_column: Whether to include AWS Account column
- """
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- config = SERVICE_CONFIG[service_key]
- columns = list(config['columns'])
- if include_account_column and 'AWS Account' not in columns:
- columns.insert(0, 'AWS Account')
-
- # Create table: 1 title row + 1 header row + data rows
- num_rows = len(resources) + 2
- num_cols = len(columns)
- table = self.document.add_table(rows=num_rows, cols=num_cols)
-
- # Move table to correct position
- tbl_element = table._tbl
- tbl_element.getparent().remove(tbl_element)
- self._insert_element_at_position(tbl_element)
-
- # Apply table styling
- self._copy_table_style_from_template(table)
-
- # Row 0: Service title (merged across all columns)
- title_row = table.rows[0]
- title_cell = title_row.cells[0]
- for i in range(1, num_cols):
- title_cell.merge(title_row.cells[i])
- title_cell.text = config['title']
- self._apply_header_cell_style(title_cell, is_title=True)
- for paragraph in title_cell.paragraphs:
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
- # Row 1: Column headers
- header_row = table.rows[1]
- for i, col_name in enumerate(columns):
- cell = header_row.cells[i]
- cell.text = col_name
- self._apply_header_cell_style(cell)
-
- # Data rows
- for row_idx, resource in enumerate(resources):
- row = table.rows[row_idx + 2]
- for col_idx, col_name in enumerate(columns):
- cell = row.cells[col_idx]
- value = self._get_resource_value(resource, col_name)
- cell.text = value
- self._apply_data_cell_style(cell)
-
- # Add spacing paragraph after table
- self._add_spacing_paragraph_at_position()
-
- def _add_vertical_tables_at_position(self, service_key: str, resources: List[Dict[str, Any]],
- include_account_column: bool = False) -> None:
- """
- Add vertical tables at the tracked position within Implementation List section.
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resources: List of resource dictionaries
- include_account_column: Whether to include AWS Account row
- """
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- config = SERVICE_CONFIG[service_key]
- columns = list(config['columns'])
- if include_account_column and 'AWS Account' not in columns:
- columns.insert(0, 'AWS Account')
-
- for resource in resources:
- # Create table: 1 title row + attribute rows
- num_rows = len(columns) + 1
- table = self.document.add_table(rows=num_rows, cols=2)
-
- # Move table to correct position
- tbl_element = table._tbl
- tbl_element.getparent().remove(tbl_element)
- self._insert_element_at_position(tbl_element)
-
- # Apply table styling
- self._copy_table_style_from_template(table)
-
- # Row 0: Service title (merged across 2 columns)
- title_row = table.rows[0]
- title_cell = title_row.cells[0]
- title_cell.merge(title_row.cells[1])
- title_cell.text = config['title']
- self._apply_header_cell_style(title_cell, is_title=True)
- for paragraph in title_cell.paragraphs:
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
- # Attribute rows
- for row_idx, col_name in enumerate(columns):
- row = table.rows[row_idx + 1]
- # Attribute name cell
- name_cell = row.cells[0]
- name_cell.text = col_name
- self._apply_header_cell_style(name_cell)
-
- # Value cell
- value_cell = row.cells[1]
- value = self._get_resource_value(resource, col_name)
- value_cell.text = value
- self._apply_data_cell_style(value_cell)
-
- # Add spacing paragraph after table
- self._add_spacing_paragraph_at_position()
-
- def _add_spacing_paragraph_at_position(self) -> None:
- """Add an empty paragraph for spacing at the tracked position."""
- p = self.document.add_paragraph()
- p_element = p._element
- p_element.getparent().remove(p_element)
- self._insert_element_at_position(p_element)
-
- def _get_resource_value(self, resource: Dict[str, Any], column_name: str) -> str:
- """
- Get value from resource for a given column name.
-
- Handles both flat dictionaries and ResourceData.to_dict() format
- where attributes are nested in 'attributes' key.
- Empty values are replaced with '-'.
-
- Args:
- resource: Resource dictionary
- column_name: Column display name
-
- Returns:
- Value as string, or '-' if empty
- """
- value = None
-
- # First try to get from attributes (ResourceData format)
- attributes = resource.get('attributes', {})
- if column_name in attributes:
- value = attributes[column_name]
-
- # Try mapped attribute key in attributes
- if value is None:
- attr_key = self._column_to_attribute(column_name)
- if attr_key in attributes:
- value = attributes[attr_key]
-
- # Fallback: try direct access on resource (flat dict format)
- if value is None and column_name in resource:
- value = resource[column_name]
-
- if value is None:
- attr_key = self._column_to_attribute(column_name)
- if attr_key in resource:
- value = resource[attr_key]
-
- # Convert to string and handle empty values
- if value is None or value == '' or (isinstance(value, str) and value.strip() == ''):
- return '-'
-
- return str(value)
-
- def _column_to_attribute(self, column_name: str) -> str:
- """
- Convert column display name to attribute key.
-
- Args:
- column_name: Display name of the column
-
- Returns:
- Attribute key for the resource dictionary
- """
- # Common mappings
- mappings = {
- 'Name': 'name',
- 'ID': 'id',
- 'Region': 'region',
- 'AZ': 'availability_zone',
- 'CIDR': 'cidr_block',
- 'VPC': 'vpc_id',
- 'VPC ID': 'vpc_id',
- 'Subnet ID': 'subnet_id',
- 'Instance ID': 'instance_id',
- 'Instance Type': 'instance_type',
- 'AMI': 'ami_id',
- 'Public IP': 'public_ip',
- 'Public DNS': 'public_dns',
- 'Private IP': 'private_ip',
- 'Elastic IP': 'elastic_ip',
- 'Key': 'key_name',
- 'Security Groups': 'security_groups',
- 'EBS Type': 'ebs_type',
- 'EBS Size': 'ebs_size',
- 'Encryption': 'encryption',
- 'AWS Account': 'account_id',
- 'Subnet Associations': 'subnet_associations',
- 'Peering Connection ID': 'peering_connection_id',
- 'Requester VPC': 'requester_vpc',
- 'Accepter VPC': 'accepter_vpc',
- 'Customer Gateway ID': 'customer_gateway_id',
- 'IP Address': 'ip_address',
- 'Virtual Private Gateway ID': 'virtual_private_gateway_id',
- 'VPN ID': 'vpn_id',
- 'Routes': 'routes',
- 'Service Name': 'service_name',
- 'Type': 'type',
- 'Launch Template': 'launch_template',
- 'Target Groups': 'target_groups',
- 'Desired': 'desired_capacity',
- 'Min': 'min_size',
- 'Max': 'max_size',
- 'Scaling Policy': 'scaling_policy',
- 'DNS': 'dns_name',
- 'Scheme': 'scheme',
- 'Availability Zones': 'availability_zones',
- 'Load Balancer': 'load_balancer',
- 'TG Name': 'target_group_name',
- 'Port': 'port',
- 'Protocol': 'protocol',
- 'Registered Instances': 'registered_instances',
- 'Health Check Path': 'health_check_path',
- 'Endpoint': 'endpoint',
- 'DB instance ID': 'db_instance_id',
- 'DB name': 'db_name',
- 'Master Username': 'master_username',
- 'DB Engine': 'engine',
- 'DB Version': 'engine_version',
- 'Storage type': 'storage_type',
- 'Storage': 'storage',
- 'Multi-AZ': 'multi_az',
- 'Deletion Protection': 'deletion_protection',
- 'Performance Insights Enabled': 'performance_insights',
- 'CloudWatch Logs': 'cloudwatch_logs',
- 'Cluster ID': 'cluster_id',
- 'Engine': 'engine',
- 'Engine Version': 'engine_version',
- 'Node Type': 'node_type',
- 'Num Nodes': 'num_nodes',
- 'Status': 'status',
- 'Cluster Name': 'cluster_name',
- 'Version': 'version',
- 'Function Name': 'function_name',
- 'Runtime': 'runtime',
- 'Memory (MB)': 'memory_size',
- 'Timeout (s)': 'timeout',
- 'Last Modified': 'last_modified',
- 'Bucket Name': 'bucket_name',
- 'Bucket': 'bucket',
- 'Event Type': 'event_type',
- 'Destination type': 'destination_type',
- 'Destination': 'destination',
- 'CloudFront ID': 'cloudfront_id',
- 'Domain Name': 'domain_name',
- 'CNAME': 'cname',
- 'Origin Domain Name': 'origin_domain_name',
- 'Origin Protocol Policy': 'origin_protocol_policy',
- 'Viewer Protocol Policy': 'viewer_protocol_policy',
- 'Allowed HTTP Methods': 'allowed_http_methods',
- 'Cached HTTP Methods': 'cached_http_methods',
- 'Zone ID': 'zone_id',
- 'Record Count': 'record_count',
- 'Domain name': 'domain_name',
- 'Additional names': 'additional_names',
- 'WebACL Name': 'webacl_name',
- 'Scope': 'scope',
- 'Rules Count': 'rules_count',
- 'Associated Resources': 'associated_resources',
- 'Topic Name': 'topic_name',
- 'Topic Display Name': 'display_name',
- 'Subscription Protocol': 'subscription_protocol',
- 'Subscription Endpoint': 'subscription_endpoint',
- 'Log Group Name': 'log_group_name',
- 'Retention Days': 'retention_days',
- 'Stored Bytes': 'stored_bytes',
- 'KMS Encryption': 'kms_encryption',
- 'Description': 'description',
- 'Event Bus': 'event_bus',
- 'State': 'state',
- 'Multi-Region Trail': 'multi_region',
- 'Log File Validation': 'log_file_validation',
- 'Regional Resources': 'regional_resources',
- 'Global Resources': 'global_resources',
- 'Retention period': 'retention_period',
- 'Port range': 'port_range',
- 'Source': 'source',
- 'Other Requirement': 'other_requirement',
- }
-
- return mappings.get(column_name, column_name.lower().replace(' ', '_'))
-
- def _find_implementation_list_section(self) -> Optional[int]:
- """
- Find the index of the 'Implementation List' section in the document.
-
- Returns:
- Index of the paragraph after the Implementation List heading, or None if not found
- """
- for i, paragraph in enumerate(self.document.paragraphs):
- text = paragraph.text.strip().lower()
- # Match variations like "4. Implementation List", "Implementation List", etc.
- if 'implementation list' in text:
- return i
- return None
-
- def _copy_table_style_from_template(self, table) -> None:
- """
- Apply consistent table styling matching the template format.
-
- Args:
- table: The table to style
- """
- # Try to use a template table style if available
- try:
- # First try to use 'Table Grid' which is a standard Word style
- table.style = 'Table Grid'
- except Exception:
- pass
-
- # Apply additional formatting for consistency
- tbl = table._tbl
- tblPr = tbl.tblPr if tbl.tblPr is not None else OxmlElement('w:tblPr')
-
- # Set table width to 100%
- tblW = OxmlElement('w:tblW')
- tblW.set(qn('w:w'), '5000')
- tblW.set(qn('w:type'), 'pct')
- tblPr.append(tblW)
-
- # Set table borders
- tblBorders = OxmlElement('w:tblBorders')
- for border_name in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
- border = OxmlElement(f'w:{border_name}')
- border.set(qn('w:val'), 'single')
- border.set(qn('w:sz'), '4')
- border.set(qn('w:space'), '0')
- border.set(qn('w:color'), '000000')
- tblBorders.append(border)
- tblPr.append(tblBorders)
-
- if tbl.tblPr is None:
- tbl.insert(0, tblPr)
-
- def _apply_header_cell_style(self, cell, is_title: bool = False) -> None:
- """
- Apply header cell styling (bold, background color, font, spacing).
-
- Args:
- cell: The cell to style
- is_title: If True, use title color (DAEEF3) and 12pt font, otherwise use header color (D9E2F3) and 11pt font
- """
- # Set background color for header cells
- tc = cell._tc
- tcPr = tc.get_or_add_tcPr()
- shd = OxmlElement('w:shd')
- shd.set(qn('w:val'), 'clear')
- shd.set(qn('w:color'), 'auto')
- # Service Name title uses DAEEF3 (light cyan), column headers use C6D9F1 (light blue)
- shd.set(qn('w:fill'), 'DAEEF3' if is_title else 'C6D9F1')
- tcPr.append(shd)
-
- # Apply font and paragraph formatting
- # Service Name (title) uses 12pt (小四), others use 11pt
- font_size = 12 if is_title else 11
- for paragraph in cell.paragraphs:
- self._apply_cell_paragraph_format(paragraph, font_size=font_size)
- for run in paragraph.runs:
- run.font.bold = True
-
- def _apply_cell_paragraph_format(self, paragraph, font_size: int = 11) -> None:
- """
- Apply standard cell paragraph formatting:
- - Font: Calibri
- - Spacing: 3pt before, 3pt after, single line spacing
-
- Args:
- paragraph: The paragraph to format
- font_size: Font size in points (default 11pt, use 12pt for Service Name)
- """
- from docx.shared import Pt
- from docx.enum.text import WD_LINE_SPACING
-
- # Set paragraph spacing: 3pt before, 3pt after, single line spacing
- paragraph.paragraph_format.space_before = Pt(3)
- paragraph.paragraph_format.space_after = Pt(3)
- paragraph.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE
-
- # Set font for all runs
- for run in paragraph.runs:
- run.font.name = 'Calibri'
- run.font.size = Pt(font_size)
- # Set East Asian font
- run._element.rPr.rFonts.set(qn('w:eastAsia'), 'Calibri')
-
- def _apply_data_cell_style(self, cell) -> None:
- """
- Apply data cell styling (font 11pt, spacing, no background).
-
- Args:
- cell: The cell to style
- """
- for paragraph in cell.paragraphs:
- self._apply_cell_paragraph_format(paragraph, font_size=11)
-
- def add_service_tables(self, scan_results: Dict[str, List[Dict[str, Any]]],
- include_account_column: bool = False,
- regions: List[str] = None) -> None:
- """
- Add tables for all services with resources, filtering out empty services.
- Content is inserted into the existing 'Implementation List' section in the template,
- replacing any placeholder content.
-
- Services are grouped under their parent service heading (e.g., VPC, ELB, S3).
- When multiple regions are selected, regional services show region in heading.
- Global services are shown once without region suffix.
-
- Args:
- scan_results: Dictionary mapping service keys to lists of resources
- include_account_column: Whether to include AWS Account column
- regions: List of regions being scanned (for multi-region heading display)
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Find the existing Implementation List section and clear placeholder content
- impl_list_idx = self._find_implementation_list_section()
-
- if impl_list_idx is not None:
- # Clear placeholder content after Implementation List until next Heading 1
- self._clear_section_content(impl_list_idx)
-
- # Get the Implementation List paragraph and find insert position
- impl_paragraph = self.document.paragraphs[impl_list_idx]
- parent = impl_paragraph._element.getparent()
- insert_index = list(parent).index(impl_paragraph._element) + 1
- self._insert_parent = parent
- self._insert_index = insert_index
- else:
- # If not found, add a new section at the end
- self.document.add_paragraph('Implementation List', style='Heading 1')
- self._insert_parent = self.document._body._body
- self._insert_index = len(list(self._insert_parent))
-
- # Determine if we need to show region in headings (multiple regions selected)
- multi_region = regions and len(regions) > 1
-
- # Helper function to group resources by region
- def group_by_region(resources: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
- """Group resources by their region attribute."""
- grouped = {}
- for resource in resources:
- # Get region from resource attributes or direct field
- region = None
- if isinstance(resource, dict):
- region = resource.get('region') or resource.get('attributes', {}).get('region')
- if not region:
- region = 'global'
- if region not in grouped:
- grouped[region] = []
- grouped[region].append(resource)
- return grouped
-
- # Helper function to deduplicate global service resources
- def deduplicate_resources(resources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
- """Deduplicate resources by ID or name."""
- seen_ids = set()
- unique_resources = []
- for resource in resources:
- res_id = None
- if isinstance(resource, dict):
- res_id = resource.get('id') or resource.get('attributes', {}).get('id')
- if not res_id:
- res_id = resource.get('name') or resource.get('attributes', {}).get('name')
- if res_id and res_id in seen_ids:
- continue
- if res_id:
- seen_ids.add(res_id)
- unique_resources.append(resource)
- return unique_resources
-
- # Helper function to sort resources by name
- def sort_resources_by_name(resources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
- """Sort resources by account (if multi-account) then by name. Resources without name come last."""
- def get_sort_key(resource: Dict[str, Any]) -> tuple:
- # Get account_id for multi-account sorting
- account_id = ''
- if isinstance(resource, dict):
- account_id = resource.get('account_id') or resource.get('attributes', {}).get('account_id') or ''
-
- # Get name
- name = None
- if isinstance(resource, dict):
- name = resource.get('name') or resource.get('attributes', {}).get('name')
-
- # Sort by: account_id, then has_name (0=has name, 1=no name), then name alphabetically
- if name and str(name).strip():
- return (str(account_id), 0, str(name).lower())
- return (str(account_id), 1, '')
- return sorted(resources, key=get_sort_key)
-
- # Helper function to add table for a service
- def add_service_table(service_key: str, resources: List[Dict[str, Any]]):
- config = SERVICE_CONFIG.get(service_key)
- if not config or not resources:
- return
- # Sort resources by name before adding to table
- sorted_resources = sort_resources_by_name(resources)
- if config['layout'] == TableLayout.HORIZONTAL:
- self._add_horizontal_table_at_position(service_key, sorted_resources, include_account_column)
- else:
- self._add_vertical_tables_at_position(service_key, sorted_resources, include_account_column)
-
- if multi_region:
- # Multi-region mode: organize by region first, then by service group
- # Step 1: Collect all regions from resources
- all_regions = set()
- for service_key in SERVICE_ORDER:
- resources = scan_results.get(service_key, [])
- if not resources:
- continue
- if service_key in GLOBAL_SERVICES:
- continue # Skip global services for region collection
- for resource in resources:
- region = None
- if isinstance(resource, dict):
- region = resource.get('region') or resource.get('attributes', {}).get('region')
- if region:
- all_regions.add(region)
-
- # Sort regions for consistent output (use provided regions order if available)
- if regions:
- sorted_regions = [r for r in regions if r in all_regions]
- # Add any regions found in resources but not in provided list
- for r in sorted(all_regions):
- if r not in sorted_regions:
- sorted_regions.append(r)
- else:
- sorted_regions = sorted(all_regions)
-
- # Step 2: Process regional services by region, then by service group
- for region in sorted_regions:
- added_groups_for_region = set()
-
- for service_key in SERVICE_ORDER:
- # Skip global services
- if service_key in GLOBAL_SERVICES:
- continue
-
- resources = scan_results.get(service_key, [])
- if not resources:
- continue
-
- config = SERVICE_CONFIG.get(service_key)
- if not config:
- continue
-
- # Filter resources for this region
- region_resources = []
- for resource in resources:
- res_region = None
- if isinstance(resource, dict):
- res_region = resource.get('region') or resource.get('attributes', {}).get('region')
- if res_region == region:
- region_resources.append(resource)
-
- if not region_resources:
- continue
-
- # Get the service group for this service
- service_group = SERVICE_GROUPS.get(service_key, config['title'])
-
- # Add Heading 2 with region suffix if not already added for this region
- if service_group not in added_groups_for_region:
- self._add_heading2_at_position(f"{service_group} ({region})")
- added_groups_for_region.add(service_group)
-
- # Add the table(s) for this service
- add_service_table(service_key, region_resources)
-
- # Step 3: Process global services (without region suffix)
- added_global_groups = set()
- for service_key in SERVICE_ORDER:
- if service_key not in GLOBAL_SERVICES:
- continue
-
- resources = scan_results.get(service_key, [])
- if not resources:
- continue
-
- config = SERVICE_CONFIG.get(service_key)
- if not config:
- continue
-
- # Deduplicate global service resources
- unique_resources = deduplicate_resources(resources)
- if not unique_resources:
- continue
-
- # Get the service group for this service
- service_group = SERVICE_GROUPS.get(service_key, config['title'])
-
- # Add Heading 2 without region suffix
- if service_group not in added_global_groups:
- self._add_heading2_at_position(service_group)
- added_global_groups.add(service_group)
-
- # Add the table(s) for this service
- add_service_table(service_key, unique_resources)
-
- else:
- # Single region or no region info: original behavior
- added_groups = set()
-
- # Check if ACM should show us-east-1 suffix
- # (when single region selected and it's not us-east-1)
- acm_needs_region_suffix = (
- regions and
- len(regions) == 1 and
- 'us-east-1' not in regions
- )
-
- for service_key in SERVICE_ORDER:
- resources = scan_results.get(service_key, [])
- if not resources:
- continue
-
- config = SERVICE_CONFIG.get(service_key)
- if not config:
- continue
-
- # Deduplicate global services
- if service_key in GLOBAL_SERVICES:
- resources = deduplicate_resources(resources)
- if not resources:
- continue
-
- # Get the service group for this service
- service_group = SERVICE_GROUPS.get(service_key, config['title'])
-
- # Special handling for ACM: add (us-east-1) suffix when needed
- if service_key == 'acm' and acm_needs_region_suffix:
- display_group = f"{service_group} (us-east-1)"
- else:
- display_group = service_group
-
- # Add Heading 2 for the service group if not already added
- if service_group not in added_groups:
- self._add_heading2_at_position(display_group)
- added_groups.add(service_group)
-
- # Add the table(s) for this service
- add_service_table(service_key, resources)
-
- # Add page break after Implementation List section
- self._add_page_break_at_position()
-
- def _add_page_break_at_position(self) -> None:
- """Add a page break at the tracked position."""
- from docx.oxml import OxmlElement
- from docx.oxml.ns import qn
-
- # Create a paragraph with page break
- p = self.document.add_paragraph()
- run = p.add_run()
- br = OxmlElement('w:br')
- br.set(qn('w:type'), 'page')
- run._r.append(br)
-
- # Move to correct position
- p_element = p._element
- p_element.getparent().remove(p_element)
- self._insert_element_at_position(p_element)
-
- def _add_heading2_at_position(self, title: str) -> None:
- """
- Add a Heading 2 paragraph at the tracked position.
-
- Args:
- title: The heading title (service group name)
- """
- heading = self.document.add_paragraph(f'▼ {title}', style='Heading 2')
- heading_element = heading._element
- heading_element.getparent().remove(heading_element)
- self._insert_element_at_position(heading_element)
-
- def _clear_section_content(self, section_start_idx: int) -> None:
- """
- Clear content between a section heading and the next Heading 1.
-
- Args:
- section_start_idx: Index of the section heading paragraph
- """
- # Find elements to remove (between this Heading 1 and next Heading 1)
- elements_to_remove = []
- body = self.document._body._body
-
- start_para = self.document.paragraphs[section_start_idx]
- start_element = start_para._element
-
- # Find the position of start element in body
- body_children = list(body)
- try:
- start_pos = body_children.index(start_element)
- except ValueError:
- return
-
- # Iterate through elements after the heading
- for i in range(start_pos + 1, len(body_children)):
- elem = body_children[i]
-
- # Check if this is a Heading 1 paragraph (next section)
- if elem.tag.endswith('}p'):
- # Check if it's a Heading 1
- pStyle = elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}pStyle')
- if pStyle is not None:
- style_val = pStyle.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val')
- if style_val and ('Heading1' in style_val or style_val == '1'):
- break
-
- elements_to_remove.append(elem)
-
- # Remove the elements
- for elem in elements_to_remove:
- body.remove(elem)
-
- def filter_empty_services(self, scan_results: Dict[str, List[Dict[str, Any]]]) -> Dict[str, List[Dict[str, Any]]]:
- """
- Filter out services with no resources.
-
- Args:
- scan_results: Dictionary mapping service keys to lists of resources
-
- Returns:
- Filtered dictionary with only non-empty services
- """
- return {k: v for k, v in scan_results.items() if v}
-
- def get_services_with_resources(self, scan_results: Dict[str, List[Dict[str, Any]]]) -> List[str]:
- """
- Get list of service keys that have resources.
-
- Args:
- scan_results: Dictionary mapping service keys to lists of resources
-
- Returns:
- List of service keys with resources
- """
- return [k for k in SERVICE_ORDER if scan_results.get(k)]
-
- def replace_architecture_picture_placeholder(self, image_path: str, width_inches: float = 6.0) -> bool:
- """
- Replace [AWS Architecture Picture] placeholder with actual image.
-
- This method searches for the placeholder text in paragraphs and replaces it
- with the provided image.
-
- Args:
- image_path: Path to the architecture diagram image file
- width_inches: Width of the image in inches (default 6.0)
-
- Returns:
- True if placeholder was found and replaced, False otherwise
-
- Raises:
- FileNotFoundError: If image file doesn't exist
- ValueError: If no document is loaded
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if not os.path.exists(image_path):
- raise FileNotFoundError(f"Image file not found: {image_path}")
-
- placeholder_text = '[AWS Architecture Picture]'
- placeholder_found = False
-
- # Search in paragraphs
- for paragraph in self.document.paragraphs:
- if placeholder_text in paragraph.text:
- # Found the placeholder, replace it with image
- # Clear the paragraph text first
- full_text = paragraph.text
- new_text = full_text.replace(placeholder_text, '')
-
- # Clear all runs
- for run in paragraph.runs:
- run.text = ''
-
- # Add the image to this paragraph
- run = paragraph.add_run()
- run.add_picture(image_path, width=Inches(width_inches))
-
- # If there was other text, add it back
- if new_text.strip():
- paragraph.add_run(new_text)
-
- # Center the paragraph
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
- placeholder_found = True
- break
-
- # Also search in tables (in case placeholder is in a table cell)
- if not placeholder_found:
- for table in self.document.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- if placeholder_text in paragraph.text:
- # Clear the paragraph text first
- full_text = paragraph.text
- new_text = full_text.replace(placeholder_text, '')
-
- # Clear all runs
- for run in paragraph.runs:
- run.text = ''
-
- # Add the image to this paragraph
- run = paragraph.add_run()
- run.add_picture(image_path, width=Inches(width_inches))
-
- # If there was other text, add it back
- if new_text.strip():
- paragraph.add_run(new_text)
-
- # Center the paragraph
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
- placeholder_found = True
- break
- if placeholder_found:
- break
- if placeholder_found:
- break
- if placeholder_found:
- break
-
- return placeholder_found
-
- def clear_architecture_picture_placeholder(self) -> bool:
- """
- Remove [AWS Architecture Picture] placeholder from the document.
-
- This method is called when no architecture image is provided,
- to clean up the placeholder text.
-
- Returns:
- True if placeholder was found and removed, False otherwise
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- placeholder_text = '[AWS Architecture Picture]'
- placeholder_found = False
-
- # Search in paragraphs
- for paragraph in self.document.paragraphs:
- if placeholder_text in paragraph.text:
- # Remove the placeholder text
- for run in paragraph.runs:
- if placeholder_text in run.text:
- run.text = run.text.replace(placeholder_text, '')
- placeholder_found = True
-
- # Also search in tables
- for table in self.document.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- if placeholder_text in paragraph.text:
- for run in paragraph.runs:
- if placeholder_text in run.text:
- run.text = run.text.replace(placeholder_text, '')
- placeholder_found = True
-
- return placeholder_found
- def embed_network_diagram(self, image_path: str, width_inches: float = 6.0) -> None:
- """
- Embed a network diagram image into the document.
-
- Args:
- image_path: Path to the image file
- width_inches: Width of the image in inches
-
- Raises:
- FileNotFoundError: If image file doesn't exist
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if not os.path.exists(image_path):
- raise FileNotFoundError(f"Image file not found: {image_path}")
-
- # Find the Network Diagram section or add one
- network_section_found = False
- for i, paragraph in enumerate(self.document.paragraphs):
- if 'Network Diagram' in paragraph.text or 'Network Architecture' in paragraph.text:
- network_section_found = True
- # Add image after this paragraph
- # We need to insert after this paragraph
- break
-
- if not network_section_found:
- # Add a new section for network diagram
- self.document.add_paragraph('Network Diagram', style='Heading 1')
-
- # Add the image
- self.document.add_picture(image_path, width=Inches(width_inches))
-
- # Center the image
- last_paragraph = self.document.paragraphs[-1]
- last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
- # Add spacing
- self.document.add_paragraph()
-
- def update_table_of_contents(self) -> None:
- """
- Update the table of contents in the document.
-
- Note: Full TOC update requires Word application. This method adds
- a field code that will update when the document is opened in Word.
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Find existing TOC or add instruction
- # python-docx cannot fully update TOC without Word application
- # We add a field that will prompt update when opened
-
- # Set document to update fields when opened
- # self._set_update_fields_on_open()
-
- for paragraph in self.document.paragraphs:
- # Look for TOC field
- for run in paragraph.runs:
- if 'TOC' in run.text or 'Table of Contents' in run.text:
- # Mark TOC for update
- self._mark_toc_for_update(paragraph)
- return
-
- def _set_update_fields_on_open(self) -> None:
- """
- Set the document to update all fields (including TOC) when opened in Word.
-
- This adds the updateFields setting to the document settings, which causes
- Word to prompt the user to update fields when the document is opened.
- """
- try:
- # Access the document settings element
- settings_element = self.document.settings.element
-
- # Create or find the updateFields element
- # Namespace for Word ML
- w_ns = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
-
- # Check if updateFields already exists
- update_fields = settings_element.find(f'{w_ns}updateFields')
-
- if update_fields is None:
- # Create the updateFields element
- update_fields = OxmlElement('w:updateFields')
- update_fields.set(qn('w:val'), 'true')
- settings_element.append(update_fields)
- else:
- # Ensure it's set to true
- update_fields.set(qn('w:val'), 'true')
-
- except Exception as e:
- # Log but don't fail - TOC update is not critical
- print(f"Warning: Could not set updateFields on open: {e}")
-
- def _mark_toc_for_update(self, paragraph) -> None:
- """
- Mark a TOC paragraph for update when document is opened.
-
- Args:
- paragraph: The TOC paragraph
- """
- # Add updateFields setting to document
- # This will prompt Word to update fields when opened
- try:
- # The updateFields setting is already set in _set_update_fields_on_open
- # This method can be used for additional TOC-specific handling if needed
- pass
- except Exception:
- pass # Settings may not be accessible
-
- def add_update_history(self, version: str = '1.0', modifier: str = '', details: str = '') -> None:
- """
- Add or update the Update History section.
-
- Args:
- version: Document version
- modifier: Name of the person who modified
- details: Details of the changes
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Find Update History section
- for i, paragraph in enumerate(self.document.paragraphs):
- if 'Update History' in paragraph.text or 'Revision History' in paragraph.text:
- # Found the section, look for the table
- # Add entry to existing table or create new one
- break
-
- # Create update history entry
- now = datetime.now()
- history_entry = {
- 'version': version,
- 'date': now.strftime('%Y-%m-%d'),
- 'modifier': modifier,
- 'details': details or 'Initial version'
- }
-
- # This would typically update an existing table
- # For now, we ensure the data is available for template replacement
-
- def save(self, output_path: str) -> str:
- """
- Save the document to a file.
-
- Args:
- output_path: Path where to save the document
-
- Returns:
- The path where the document was saved
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Ensure directory exists
- os.makedirs(os.path.dirname(output_path), exist_ok=True)
-
- self.document.save(output_path)
- return output_path
-
- def get_file_size(self, file_path: str) -> int:
- """
- Get the size of a file in bytes.
-
- Args:
- file_path: Path to the file
-
- Returns:
- File size in bytes
- """
- return os.path.getsize(file_path)
-
- def generate_report(self, scan_results: Dict[str, List[Dict[str, Any]]],
- project_metadata: Dict[str, Any],
- output_path: str,
- network_diagram_path: str = None,
- template_path: str = None,
- regions: List[str] = None) -> Dict[str, Any]:
- """
- Generate a complete report from scan results.
-
- This is the main entry point for report generation.
-
- Args:
- scan_results: Dictionary mapping service keys to lists of resources
- project_metadata: Project metadata for placeholder replacement
- output_path: Path where to save the generated report
- network_diagram_path: Optional path to network diagram image
- template_path: Optional path to template file
- regions: Optional list of regions being scanned (for multi-region heading display)
-
- Returns:
- Dictionary with report metadata:
- - file_path: Path to the generated report
- - file_name: Name of the report file
- - file_size: Size of the report in bytes
- - services_included: List of services included in the report
- """
- # Load template
- self.load_template(template_path)
-
- # Create placeholder replacements
- replacements = self.create_project_metadata_replacements(project_metadata)
-
- # Replace placeholders
- self.replace_placeholders(replacements)
-
- # Filter empty services
- filtered_results = self.filter_empty_services(scan_results)
-
- # Determine if multi-account (need AWS Account column)
- account_ids = set()
- for resources in filtered_results.values():
- for resource in resources:
- # Handle both dict and ResourceData objects
- if isinstance(resource, dict):
- if 'account_id' in resource:
- account_ids.add(resource['account_id'])
- elif hasattr(resource, 'account_id'):
- account_ids.add(resource.account_id)
- include_account_column = len(account_ids) > 1
-
- # Add service tables with region info
- self.add_service_tables(filtered_results, include_account_column, regions)
-
- # Handle architecture picture placeholder
- if network_diagram_path and os.path.exists(network_diagram_path):
- # Replace placeholder with actual image
- self.replace_architecture_picture_placeholder(network_diagram_path)
- else:
- # No image provided, clear the placeholder
- self.clear_architecture_picture_placeholder()
-
- # Update table of contents
- self.update_table_of_contents()
-
- # Add update history
- self.add_update_history(
- version='1.0',
- modifier=project_metadata.get('cloud_engineer', ''),
- details='Initial AWS resource inventory report'
- )
-
- # Save the document
- self.save(output_path)
-
- # Get file info
- file_size = self.get_file_size(output_path)
- file_name = os.path.basename(output_path)
-
- return {
- 'file_path': output_path,
- 'file_name': file_name,
- 'file_size': file_size,
- 'services_included': list(filtered_results.keys()),
- 'accounts_count': len(account_ids),
- }
- def generate_report_filename(project_metadata: Dict[str, Any]) -> str:
- """
- Generate a report filename from project metadata.
-
- Args:
- project_metadata: Project metadata dictionary
-
- Returns:
- Generated filename
- """
- client_name = project_metadata.get('client_name', 'Client')
- project_name = project_metadata.get('project_name', 'Project')
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-
- # Sanitize names for filename
- client_name = re.sub(r'[^\w\s-]', '', client_name).strip().replace(' ', '-')
- project_name = re.sub(r'[^\w\s-]', '', project_name).strip().replace(' ', '-')
-
- return f"{client_name}-{project_name}-Report-{timestamp}.docx"
|