| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962 |
- """
- Report Generator Service
- This module handles Word document generation from AWS scan results.
- It loads templates, replaces placeholders, generates tables, and produces
- the final report document.
- """
- import os
- import re
- import copy
- from datetime import datetime
- from typing import Dict, List, Any, Optional, Tuple
- from docx import Document
- from docx.shared import Inches, Pt, Cm
- from docx.enum.text import WD_ALIGN_PARAGRAPH
- from docx.enum.table import WD_TABLE_ALIGNMENT
- from docx.oxml.ns import qn
- from docx.oxml import OxmlElement
- class TableLayout:
- """Table layout types for different services"""
- HORIZONTAL = 'horizontal' # Column headers at top, multiple rows
- VERTICAL = 'vertical' # Attribute names in left column, values in right
- # Service configuration matching the design document
- SERVICE_CONFIG = {
- # ===== VPC Related Resources =====
- 'vpc': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'VPC',
- 'columns': ['Region', 'Name', 'ID', 'CIDR'],
- },
- 'subnet': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Subnet',
- 'columns': ['Name', 'ID', 'AZ', 'CIDR'],
- },
- 'route_table': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Route Table',
- 'columns': ['Name', 'ID', 'Subnet Associations'],
- },
- 'internet_gateway': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Internet Gateway',
- 'columns': ['Name', 'ID'],
- },
- 'nat_gateway': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'NAT Gateway',
- 'columns': ['Name', 'ID', 'Public IP', 'Private IP'],
- },
- 'security_group': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Security Group',
- 'columns': ['Name', 'ID', 'Protocol', 'Port range', 'Source'],
- },
- 'vpc_endpoint': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Endpoint',
- 'columns': ['Name', 'ID', 'VPC', 'Service Name', 'Type'],
- },
- 'vpc_peering': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'VPC Peering',
- 'columns': ['Name', 'Peering Connection ID', 'Requester VPC', 'Accepter VPC'],
- },
- 'customer_gateway': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Customer Gateway',
- 'columns': ['Name', 'Customer Gateway ID', 'IP Address'],
- },
- 'virtual_private_gateway': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Virtual Private Gateway',
- 'columns': ['Name', 'Virtual Private Gateway ID', 'VPC'],
- },
- 'vpn_connection': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'VPN Connection',
- 'columns': ['Name', 'VPN ID', 'Routes'],
- },
-
- # ===== EC2 Related Resources =====
- 'ec2': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Instance',
- 'columns': ['Name', 'Instance ID', 'Instance Type', 'AZ', 'AMI',
- 'Public IP', 'Public DNS', 'Private IP', 'VPC ID', 'Subnet ID',
- 'Key', 'Security Groups', 'EBS Type', 'EBS Size', 'Encryption',
- 'Other Requirement'],
- },
- 'elastic_ip': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Elastic IP',
- 'columns': ['Name', 'Elastic IP'],
- },
-
- # ===== Auto Scaling =====
- 'autoscaling': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Auto Scaling Group',
- 'columns': ['Name', 'Launch Template', 'AMI', 'Instance type',
- 'Key', 'Target Groups', 'Desired', 'Min', 'Max',
- 'Scaling Policy'],
- },
-
- # ===== ELB Related Resources =====
- 'elb': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Load Balancer',
- 'columns': ['Name', 'Type', 'DNS', 'Scheme', 'VPC',
- 'Availability Zones', 'Subnet', 'Security Groups'],
- },
- 'target_group': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Target Group',
- 'columns': ['Load Balancer', 'TG Name', 'Port', 'Protocol',
- 'Registered Instances', 'Health Check Path'],
- },
-
- # ===== RDS =====
- 'rds': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'DB Instance',
- 'columns': ['Region', 'Endpoint', 'DB instance ID', 'DB name',
- 'Master Username', 'Port', 'DB Engine', 'DB Version',
- 'Instance Type', 'Storage type', 'Storage', 'Multi-AZ',
- 'Security Group', 'Deletion Protection',
- 'Performance Insights Enabled', 'CloudWatch Logs'],
- },
-
- # ===== ElastiCache =====
- 'elasticache': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Cache Cluster',
- 'columns': ['Cluster ID', 'Engine', 'Engine Version', 'Node Type',
- 'Num Nodes', 'Status'],
- },
-
- # ===== EKS =====
- 'eks': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Cluster',
- 'columns': ['Cluster Name', 'Version', 'Status', 'Endpoint', 'VPC ID'],
- },
-
- # ===== Lambda =====
- 'lambda': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Function',
- 'columns': ['Function Name', 'Runtime', 'Memory (MB)', 'Timeout (s)', 'Last Modified'],
- },
-
- # ===== S3 =====
- 's3': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Bucket',
- 'columns': ['Region', 'Bucket Name'],
- },
- 's3_event_notification': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'S3 event notification',
- 'columns': ['Bucket', 'Name', 'Event Type', 'Destination type', 'Destination'],
- },
-
- # ===== CloudFront (Global) =====
- 'cloudfront': {
- 'layout': TableLayout.VERTICAL,
- 'title': 'Distribution',
- 'columns': ['CloudFront ID', 'Domain Name', 'CNAME',
- 'Origin Domain Name', 'Origin Protocol Policy',
- 'Viewer Protocol Policy', 'Allowed HTTP Methods',
- 'Cached HTTP Methods'],
- },
-
- # ===== Route 53 (Global) =====
- 'route53': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Hosted Zone',
- 'columns': ['Zone ID', 'Name', 'Type', 'Record Count'],
- },
-
- # ===== ACM (Global) =====
- 'acm': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'ACM',
- 'columns': ['Domain name', 'Additional names'],
- },
-
- # ===== WAF (Global) =====
- 'waf': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Web ACL',
- 'columns': ['WebACL Name', 'Scope', 'Rules Count', 'Associated Resources'],
- },
-
- # ===== SNS =====
- 'sns': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Topic',
- 'columns': ['Topic Name', 'Topic Display Name', 'Subscription Protocol',
- 'Subscription Endpoint'],
- },
-
- # ===== CloudWatch =====
- 'cloudwatch': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Log Group',
- 'columns': ['Log Group Name', 'Retention Days', 'Stored Bytes', 'KMS Encryption'],
- },
-
- # ===== EventBridge =====
- 'eventbridge': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Rule',
- 'columns': ['Name', 'Description', 'Event Bus', 'State'],
- },
-
- # ===== CloudTrail =====
- 'cloudtrail': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Trail',
- 'columns': ['Name', 'Multi-Region Trail', 'Log File Validation', 'KMS Encryption'],
- },
-
- # ===== Config =====
- 'config': {
- 'layout': TableLayout.HORIZONTAL,
- 'title': 'Config',
- 'columns': ['Name', 'Regional Resources', 'Global Resources', 'Retention period'],
- },
- }
- # Service display order for the report
- SERVICE_ORDER = [
- 'vpc', 'subnet', 'route_table', 'internet_gateway', 'nat_gateway',
- 'security_group', 'vpc_endpoint', 'vpc_peering',
- 'customer_gateway', 'virtual_private_gateway', 'vpn_connection',
- 'ec2', 'elastic_ip', 'autoscaling',
- 'elb', 'target_group',
- 'rds', 'elasticache', 'eks',
- 'lambda', 's3', 's3_event_notification',
- 'cloudfront', 'route53', 'acm', 'waf',
- 'sns', 'cloudwatch', 'eventbridge', 'cloudtrail', 'config'
- ]
- # Global services (not region-specific, should not be duplicated per region)
- GLOBAL_SERVICES = ['cloudfront', 'route53', 'waf', 's3', 's3_event_notification', 'cloudtrail']
- # Service grouping for Heading 2 titles
- # Maps service keys to their parent service group for the heading
- SERVICE_GROUPS = {
- # VPC group - all VPC related resources under "VPC" heading
- 'vpc': 'VPC',
- 'subnet': 'VPC',
- 'route_table': 'VPC',
- 'internet_gateway': 'VPC',
- 'nat_gateway': 'VPC',
- 'security_group': 'VPC',
- 'vpc_endpoint': 'VPC',
- 'vpc_peering': 'VPC',
- 'customer_gateway': 'VPC',
- 'virtual_private_gateway': 'VPC',
- 'vpn_connection': 'VPC',
-
- # EC2 group
- 'ec2': 'EC2',
- 'elastic_ip': 'EC2',
-
- # Auto Scaling
- 'autoscaling': 'AutoScaling',
-
- # ELB group - Load Balancer and Target Group under "ELB" heading
- 'elb': 'ELB',
- 'target_group': 'ELB',
-
- # Database services - use service name as heading
- 'rds': 'RDS',
- 'elasticache': 'Elasticache',
- 'eks': 'EKS',
-
- # Lambda
- 'lambda': 'Lambda',
-
- # S3 group - Bucket and event notification under "S3" heading
- 's3': 'S3',
- 's3_event_notification': 'S3',
-
- # Global services
- 'cloudfront': 'CloudFront',
- 'route53': 'Route53',
- 'acm': 'ACM',
- 'waf': 'WAF',
-
- # Monitoring services
- 'sns': 'SNS',
- 'cloudwatch': 'CloudWatch',
- 'eventbridge': 'EventBridge',
- 'cloudtrail': 'CloudTrail',
- 'config': 'Config',
- }
- # Order of service groups for the report (determines heading order)
- SERVICE_GROUP_ORDER = [
- 'VPC', 'EC2', 'AutoScaling', 'ELB',
- 'RDS', 'Elasticache', 'EKS', 'Lambda', 'S3',
- 'CloudFront', 'Route53', 'ACM', 'WAF',
- 'SNS', 'CloudWatch', 'EventBridge', 'CloudTrail', 'Config'
- ]
- class ReportGenerator:
- """
- Generates Word reports from AWS scan results using templates.
-
- This class handles:
- - Loading Word templates from sample-reports folder
- - Parsing and replacing placeholders
- - Generating horizontal and vertical tables for different services
- - Embedding network diagrams
- - Updating table of contents
- """
-
- def __init__(self, template_path: str = None):
- """
- Initialize the report generator.
-
- Args:
- template_path: Path to the Word template file. If None, uses default template.
- """
- self.template_path = template_path
- self.document = None
- self._placeholder_pattern = re.compile(r'\[([^\]]+)\]')
-
- def load_template(self, template_path: str = None) -> Document:
- """
- Load a Word template file.
-
- Args:
- template_path: Path to the template file
-
- Returns:
- Loaded Document object
-
- Raises:
- FileNotFoundError: If template file doesn't exist
- ValueError: If template file is invalid
- """
- path = template_path or self.template_path
- if not path:
- # Use default template
- path = self._get_default_template_path()
-
- if not os.path.exists(path):
- raise FileNotFoundError(f"Template file not found: {path}")
-
- try:
- self.document = Document(path)
- return self.document
- except Exception as e:
- raise ValueError(f"Failed to load template: {str(e)}")
-
- def _get_default_template_path(self) -> str:
- """Get the default template path from sample-reports folder."""
- # Look for the template with placeholders
- # Try multiple possible locations
- possible_base_dirs = [
- os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), # Development
- os.path.dirname(os.path.dirname(os.path.dirname(__file__))), # Docker /app
- '/app', # Docker absolute
- ]
-
- template_name = '[Client Name]-[Project Name]-Project-Report-v1.0.docx'
-
- for base_dir in possible_base_dirs:
- sample_reports_dir = os.path.join(base_dir, 'sample-reports')
- template_path = os.path.join(sample_reports_dir, template_name)
-
- if os.path.exists(template_path):
- return template_path
-
- # Fall back to any .docx file in sample-reports
- if os.path.exists(sample_reports_dir):
- for file in os.listdir(sample_reports_dir):
- if file.endswith('.docx'):
- return os.path.join(sample_reports_dir, file)
-
- raise FileNotFoundError("No template file found in sample-reports folder")
-
- def find_placeholders(self) -> List[str]:
- """
- Find all placeholders in the document.
-
- Returns:
- List of placeholder names (without brackets)
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- placeholders = set()
-
- # Search in paragraphs
- for paragraph in self.document.paragraphs:
- matches = self._placeholder_pattern.findall(paragraph.text)
- placeholders.update(matches)
-
- # Search in tables
- for table in self.document.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- matches = self._placeholder_pattern.findall(paragraph.text)
- placeholders.update(matches)
-
- # Search in headers and footers
- for section in self.document.sections:
- for header in [section.header, section.first_page_header, section.even_page_header]:
- if header:
- for paragraph in header.paragraphs:
- matches = self._placeholder_pattern.findall(paragraph.text)
- placeholders.update(matches)
-
- for footer in [section.footer, section.first_page_footer, section.even_page_footer]:
- if footer:
- for paragraph in footer.paragraphs:
- matches = self._placeholder_pattern.findall(paragraph.text)
- placeholders.update(matches)
-
- return list(placeholders)
-
- def get_template_structure(self) -> Dict[str, Any]:
- """
- Analyze and return the template structure.
-
- Returns:
- Dictionary containing template structure information
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- structure = {
- 'sections': len(self.document.sections),
- 'paragraphs': len(self.document.paragraphs),
- 'tables': len(self.document.tables),
- 'placeholders': self.find_placeholders(),
- 'headings': [],
- }
-
- # Extract headings
- for paragraph in self.document.paragraphs:
- if paragraph.style and paragraph.style.name.startswith('Heading'):
- structure['headings'].append({
- 'level': paragraph.style.name,
- 'text': paragraph.text
- })
-
- return structure
-
- def replace_placeholders(self, replacements: Dict[str, str]) -> None:
- """
- Replace all placeholders in the document with actual values.
-
- Args:
- replacements: Dictionary mapping placeholder names to values
- e.g., {'Client Name': 'Acme Corp', 'Project Name': 'Cloud Migration'}
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Replace in paragraphs
- for paragraph in self.document.paragraphs:
- self._replace_in_paragraph(paragraph, replacements)
-
- # Replace in tables
- for table in self.document.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- self._replace_in_paragraph(paragraph, replacements)
-
- # Replace in headers and footers
- for section in self.document.sections:
- for header in [section.header, section.first_page_header, section.even_page_header]:
- if header:
- for paragraph in header.paragraphs:
- self._replace_in_paragraph(paragraph, replacements)
-
- for footer in [section.footer, section.first_page_footer, section.even_page_footer]:
- if footer:
- for paragraph in footer.paragraphs:
- self._replace_in_paragraph(paragraph, replacements)
-
- def _replace_in_paragraph(self, paragraph, replacements: Dict[str, str]) -> None:
- """
- Replace placeholders in a single paragraph while preserving formatting.
-
- Supports both bracketed placeholders like [Client Name] and
- unbracketed placeholders like YYYY. mm. DD.
-
- Args:
- paragraph: The paragraph to process
- replacements: Dictionary of placeholder replacements
- """
- if not paragraph.text:
- return
-
- # Check if paragraph contains any placeholders (bracketed or unbracketed)
- text = paragraph.text
- has_placeholder = False
- for placeholder in replacements.keys():
- # Check for bracketed placeholder [placeholder]
- if f'[{placeholder}]' in text:
- has_placeholder = True
- break
- # Check for unbracketed placeholder (for date formats like YYYY. mm. DD)
- if placeholder in text:
- has_placeholder = True
- break
-
- if not has_placeholder:
- return
-
- # Replace placeholders in the text
- new_text = text
- for placeholder, value in replacements.items():
- # First try bracketed replacement
- new_text = new_text.replace(f'[{placeholder}]', str(value) if value else '')
- # Then try unbracketed replacement (for date formats like YYYY. mm. DD)
- # Only replace patterns that start with YYYY to avoid replacing column names like "Date"
- if placeholder.startswith('YYYY'):
- new_text = new_text.replace(placeholder, str(value) if value else '')
-
- # If text changed, update the paragraph
- if new_text != text:
- # Try to preserve formatting by updating runs
- if len(paragraph.runs) == 1:
- paragraph.runs[0].text = new_text
- else:
- # For complex formatting, rebuild the paragraph
- # Store the first run's formatting
- if paragraph.runs:
- first_run = paragraph.runs[0]
- font_name = first_run.font.name
- font_size = first_run.font.size
- bold = first_run.font.bold
- italic = first_run.font.italic
-
- # Clear all runs
- for run in paragraph.runs:
- run.text = ''
-
- # Set new text on first run
- paragraph.runs[0].text = new_text
- else:
- # No runs, add new one
- paragraph.add_run(new_text)
-
- def create_project_metadata_replacements(self, metadata: Dict[str, Any]) -> Dict[str, str]:
- """
- Create placeholder replacements from project metadata.
-
- Args:
- metadata: Project metadata dictionary containing:
- - clientName/client_name, projectName/project_name
- - bdManager/bd_manager, bdManagerEmail/bd_manager_email
- - solutionsArchitect/solutions_architect, solutionsArchitectEmail/solutions_architect_email
- - cloudEngineer/cloud_engineer, cloudEngineerEmail/cloud_engineer_email
-
- Returns:
- Dictionary of placeholder replacements
- """
- now = datetime.now()
-
- # Helper to get value from either camelCase or snake_case key
- def get_value(camel_key: str, snake_key: str) -> str:
- return metadata.get(camel_key, '') or metadata.get(snake_key, '') or ''
-
- # Extract values supporting both naming conventions
- client_name = get_value('clientName', 'client_name')
- project_name = get_value('projectName', 'project_name')
- bd_manager = get_value('bdManager', 'bd_manager')
- bd_manager_email = get_value('bdManagerEmail', 'bd_manager_email')
- solutions_architect = get_value('solutionsArchitect', 'solutions_architect')
- solutions_architect_email = get_value('solutionsArchitectEmail', 'solutions_architect_email')
- cloud_engineer = get_value('cloudEngineer', 'cloud_engineer')
- cloud_engineer_email = get_value('cloudEngineerEmail', 'cloud_engineer_email')
-
- replacements = {
- # Client and Project
- 'Client Name': client_name,
- 'Project Name': project_name,
-
- # BD Manager
- 'BD Manager': bd_manager,
- 'BD Manager Name': bd_manager,
- 'BD Manager Email': bd_manager_email,
-
- # Solutions Architect
- 'Solutions Architect': solutions_architect,
- 'Solutions Architect Name': solutions_architect,
- 'Solutions Architect Email': solutions_architect_email,
-
- # Cloud Engineer
- 'Cloud Engineer': cloud_engineer,
- 'Cloud Engineer Name': cloud_engineer,
- 'Cloud Engineer Email': cloud_engineer_email,
-
- # Date placeholders - multiple formats
- 'Date': now.strftime('%Y-%m-%d'),
- 'YYYY. mm. DD': now.strftime('%Y. %m. %d'),
- 'YYYY.mm.DD': now.strftime('%Y.%m.%d'),
- 'YYYY-mm-DD': now.strftime('%Y-%m-%d'),
- 'Month': now.strftime('%B'),
- 'Year': str(now.year),
- 'Report Date': now.strftime('%B %d, %Y'),
-
- # Version info
- 'Version': '1.0',
- 'Document Version': '1.0',
- }
-
- return replacements
-
- def add_horizontal_table(self, service_key: str, resources: List[Dict[str, Any]],
- include_account_column: bool = False) -> None:
- """
- Add a horizontal table for a service (column headers at top, multiple rows).
-
- Format:
- | Service Name (merged across all columns) |
- | Column1 | Column2 | Column3 |
- | Value1 | Value2 | Value3 |
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resources: List of resource dictionaries
- include_account_column: Whether to include AWS Account column (for multi-account)
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- config = SERVICE_CONFIG[service_key]
- if config['layout'] != TableLayout.HORIZONTAL:
- raise ValueError(f"Service {service_key} uses vertical layout, not horizontal")
-
- columns = list(config['columns'])
- if include_account_column and 'AWS Account' not in columns:
- columns.insert(0, 'AWS Account')
-
- # Create table: 1 title row + 1 header row + data rows
- num_rows = len(resources) + 2 # +1 for title, +1 for header
- num_cols = len(columns)
- table = self.document.add_table(rows=num_rows, cols=num_cols)
-
- # Apply table styling
- self._copy_table_style_from_template(table)
-
- # Row 0: Service title (merged across all columns)
- title_row = table.rows[0]
- # Merge all cells in the title row
- title_cell = title_row.cells[0]
- for i in range(1, num_cols):
- title_cell.merge(title_row.cells[i])
- title_cell.text = config['title']
- self._apply_header_cell_style(title_cell, is_title=True)
- # Center the title
- for paragraph in title_cell.paragraphs:
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
- # Row 1: Column headers
- header_row = table.rows[1]
- for i, col_name in enumerate(columns):
- cell = header_row.cells[i]
- cell.text = col_name
- self._apply_header_cell_style(cell)
-
- # Data rows
- for row_idx, resource in enumerate(resources):
- row = table.rows[row_idx + 2] # +2 to skip title and header rows
- for col_idx, col_name in enumerate(columns):
- cell = row.cells[col_idx]
- value = self._get_resource_value(resource, col_name)
- cell.text = value
-
- # Add spacing after table
- self.document.add_paragraph()
-
- def add_vertical_table(self, service_key: str, resource: Dict[str, Any],
- include_account_column: bool = False,
- show_title: bool = True) -> None:
- """
- Add a vertical table for a single resource (attribute names in left column).
-
- Format:
- | Service Name (merged across 2 columns) |
- | Column1 | Value1 |
- | Column2 | Value2 |
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resource: Single resource dictionary
- include_account_column: Whether to include AWS Account row (for multi-account)
- show_title: Whether to show the service title row (first resource shows title)
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- config = SERVICE_CONFIG[service_key]
- if config['layout'] != TableLayout.VERTICAL:
- raise ValueError(f"Service {service_key} uses horizontal layout, not vertical")
-
- columns = list(config['columns'])
- if include_account_column and 'AWS Account' not in columns:
- columns.insert(0, 'AWS Account')
-
- # Create table with 2 columns: 1 title row + attribute rows
- num_rows = len(columns) + (1 if show_title else 0) # +1 for title row if showing
- table = self.document.add_table(rows=num_rows, cols=2)
-
- # Apply table styling
- self._copy_table_style_from_template(table)
-
- row_offset = 0
-
- # Row 0: Service title (merged across 2 columns) - only for first resource
- if show_title:
- title_row = table.rows[0]
- title_cell = title_row.cells[0]
- title_cell.merge(title_row.cells[1])
- title_cell.text = config['title']
- self._apply_header_cell_style(title_cell, is_title=True)
- # Center the title
- for paragraph in title_cell.paragraphs:
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
- row_offset = 1
-
- # Attribute rows
- for row_idx, col_name in enumerate(columns):
- row = table.rows[row_idx + row_offset]
- # Attribute name cell (apply header styling)
- name_cell = row.cells[0]
- name_cell.text = col_name
- self._apply_header_cell_style(name_cell)
-
- # Value cell
- value_cell = row.cells[1]
- value = self._get_resource_value(resource, col_name)
- value_cell.text = value
-
- # Add spacing after table
- self.document.add_paragraph()
-
- def add_vertical_tables_for_service(self, service_key: str, resources: List[Dict[str, Any]],
- include_account_column: bool = False) -> None:
- """
- Add vertical tables for all resources of a service.
- Each resource gets its own table with the service title in the first row.
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resources: List of resource dictionaries
- include_account_column: Whether to include AWS Account row
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- # Add a table for each resource, each with its own title row
- for resource in resources:
- self.add_vertical_table(service_key, resource, include_account_column, show_title=True)
-
- def _insert_element_at_position(self, element) -> None:
- """
- Insert an element at the tracked position within Implementation List section.
-
- Args:
- element: The XML element to insert
- """
- if self._insert_parent is not None and self._insert_index is not None:
- self._insert_parent.insert(self._insert_index, element)
- self._insert_index += 1
- else:
- # Fallback: append to document body
- self.document._body._body.append(element)
-
- def _add_horizontal_table_at_position(self, service_key: str, resources: List[Dict[str, Any]],
- include_account_column: bool = False) -> None:
- """
- Add a horizontal table at the tracked position within Implementation List section.
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resources: List of resource dictionaries
- include_account_column: Whether to include AWS Account column
- """
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- config = SERVICE_CONFIG[service_key]
- columns = list(config['columns'])
- if include_account_column and 'AWS Account' not in columns:
- columns.insert(0, 'AWS Account')
-
- # Create table: 1 title row + 1 header row + data rows
- num_rows = len(resources) + 2
- num_cols = len(columns)
- table = self.document.add_table(rows=num_rows, cols=num_cols)
-
- # Move table to correct position
- tbl_element = table._tbl
- tbl_element.getparent().remove(tbl_element)
- self._insert_element_at_position(tbl_element)
-
- # Apply table styling
- self._copy_table_style_from_template(table)
-
- # Row 0: Service title (merged across all columns)
- title_row = table.rows[0]
- title_cell = title_row.cells[0]
- for i in range(1, num_cols):
- title_cell.merge(title_row.cells[i])
- title_cell.text = config['title']
- self._apply_header_cell_style(title_cell, is_title=True)
- for paragraph in title_cell.paragraphs:
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
- # Row 1: Column headers
- header_row = table.rows[1]
- for i, col_name in enumerate(columns):
- cell = header_row.cells[i]
- cell.text = col_name
- self._apply_header_cell_style(cell)
-
- # Data rows
- for row_idx, resource in enumerate(resources):
- row = table.rows[row_idx + 2]
- for col_idx, col_name in enumerate(columns):
- cell = row.cells[col_idx]
- value = self._get_resource_value(resource, col_name)
- cell.text = value
- self._apply_data_cell_style(cell)
-
- # Add spacing paragraph after table
- self._add_spacing_paragraph_at_position()
-
- def _add_vertical_tables_at_position(self, service_key: str, resources: List[Dict[str, Any]],
- include_account_column: bool = False) -> None:
- """
- Add vertical tables at the tracked position within Implementation List section.
-
- Args:
- service_key: The service key from SERVICE_CONFIG
- resources: List of resource dictionaries
- include_account_column: Whether to include AWS Account row
- """
- if service_key not in SERVICE_CONFIG:
- raise ValueError(f"Unknown service: {service_key}")
-
- config = SERVICE_CONFIG[service_key]
- columns = list(config['columns'])
- if include_account_column and 'AWS Account' not in columns:
- columns.insert(0, 'AWS Account')
-
- for resource in resources:
- # Create table: 1 title row + attribute rows
- num_rows = len(columns) + 1
- table = self.document.add_table(rows=num_rows, cols=2)
-
- # Move table to correct position
- tbl_element = table._tbl
- tbl_element.getparent().remove(tbl_element)
- self._insert_element_at_position(tbl_element)
-
- # Apply table styling
- self._copy_table_style_from_template(table)
-
- # Row 0: Service title (merged across 2 columns)
- title_row = table.rows[0]
- title_cell = title_row.cells[0]
- title_cell.merge(title_row.cells[1])
- title_cell.text = config['title']
- self._apply_header_cell_style(title_cell, is_title=True)
- for paragraph in title_cell.paragraphs:
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
- # Attribute rows
- for row_idx, col_name in enumerate(columns):
- row = table.rows[row_idx + 1]
- # Attribute name cell
- name_cell = row.cells[0]
- name_cell.text = col_name
- self._apply_header_cell_style(name_cell)
-
- # Value cell
- value_cell = row.cells[1]
- value = self._get_resource_value(resource, col_name)
- value_cell.text = value
- self._apply_data_cell_style(value_cell)
-
- # Add spacing paragraph after table
- self._add_spacing_paragraph_at_position()
-
- def _add_spacing_paragraph_at_position(self) -> None:
- """Add an empty paragraph for spacing at the tracked position."""
- p = self.document.add_paragraph()
- p_element = p._element
- p_element.getparent().remove(p_element)
- self._insert_element_at_position(p_element)
-
- def _get_resource_value(self, resource: Dict[str, Any], column_name: str) -> str:
- """
- Get value from resource for a given column name.
-
- Handles both flat dictionaries and ResourceData.to_dict() format
- where attributes are nested in 'attributes' key.
- Empty values are replaced with '-'.
-
- Args:
- resource: Resource dictionary
- column_name: Column display name
-
- Returns:
- Value as string, or '-' if empty
- """
- value = None
-
- # First try to get from attributes (ResourceData format)
- attributes = resource.get('attributes', {})
- if column_name in attributes:
- value = attributes[column_name]
-
- # Try mapped attribute key in attributes
- if value is None:
- attr_key = self._column_to_attribute(column_name)
- if attr_key in attributes:
- value = attributes[attr_key]
-
- # Fallback: try direct access on resource (flat dict format)
- if value is None and column_name in resource:
- value = resource[column_name]
-
- if value is None:
- attr_key = self._column_to_attribute(column_name)
- if attr_key in resource:
- value = resource[attr_key]
-
- # Convert to string and handle empty values
- if value is None or value == '' or (isinstance(value, str) and value.strip() == ''):
- return '-'
-
- return str(value)
-
- def _column_to_attribute(self, column_name: str) -> str:
- """
- Convert column display name to attribute key.
-
- Args:
- column_name: Display name of the column
-
- Returns:
- Attribute key for the resource dictionary
- """
- # Common mappings
- mappings = {
- 'Name': 'name',
- 'ID': 'id',
- 'Region': 'region',
- 'AZ': 'availability_zone',
- 'CIDR': 'cidr_block',
- 'VPC': 'vpc_id',
- 'VPC ID': 'vpc_id',
- 'Subnet ID': 'subnet_id',
- 'Instance ID': 'instance_id',
- 'Instance Type': 'instance_type',
- 'AMI': 'ami_id',
- 'Public IP': 'public_ip',
- 'Public DNS': 'public_dns',
- 'Private IP': 'private_ip',
- 'Elastic IP': 'elastic_ip',
- 'Key': 'key_name',
- 'Security Groups': 'security_groups',
- 'EBS Type': 'ebs_type',
- 'EBS Size': 'ebs_size',
- 'Encryption': 'encryption',
- 'AWS Account': 'account_id',
- 'Subnet Associations': 'subnet_associations',
- 'Peering Connection ID': 'peering_connection_id',
- 'Requester VPC': 'requester_vpc',
- 'Accepter VPC': 'accepter_vpc',
- 'Customer Gateway ID': 'customer_gateway_id',
- 'IP Address': 'ip_address',
- 'Virtual Private Gateway ID': 'virtual_private_gateway_id',
- 'VPN ID': 'vpn_id',
- 'Routes': 'routes',
- 'Service Name': 'service_name',
- 'Type': 'type',
- 'Launch Template': 'launch_template',
- 'Target Groups': 'target_groups',
- 'Desired': 'desired_capacity',
- 'Min': 'min_size',
- 'Max': 'max_size',
- 'Scaling Policy': 'scaling_policy',
- 'DNS': 'dns_name',
- 'Scheme': 'scheme',
- 'Availability Zones': 'availability_zones',
- 'Load Balancer': 'load_balancer',
- 'TG Name': 'target_group_name',
- 'Port': 'port',
- 'Protocol': 'protocol',
- 'Registered Instances': 'registered_instances',
- 'Health Check Path': 'health_check_path',
- 'Endpoint': 'endpoint',
- 'DB instance ID': 'db_instance_id',
- 'DB name': 'db_name',
- 'Master Username': 'master_username',
- 'DB Engine': 'engine',
- 'DB Version': 'engine_version',
- 'Storage type': 'storage_type',
- 'Storage': 'storage',
- 'Multi-AZ': 'multi_az',
- 'Deletion Protection': 'deletion_protection',
- 'Performance Insights Enabled': 'performance_insights',
- 'CloudWatch Logs': 'cloudwatch_logs',
- 'Cluster ID': 'cluster_id',
- 'Engine': 'engine',
- 'Engine Version': 'engine_version',
- 'Node Type': 'node_type',
- 'Num Nodes': 'num_nodes',
- 'Status': 'status',
- 'Cluster Name': 'cluster_name',
- 'Version': 'version',
- 'Function Name': 'function_name',
- 'Runtime': 'runtime',
- 'Memory (MB)': 'memory_size',
- 'Timeout (s)': 'timeout',
- 'Last Modified': 'last_modified',
- 'Bucket Name': 'bucket_name',
- 'Bucket': 'bucket',
- 'Event Type': 'event_type',
- 'Destination type': 'destination_type',
- 'Destination': 'destination',
- 'CloudFront ID': 'cloudfront_id',
- 'Domain Name': 'domain_name',
- 'CNAME': 'cname',
- 'Origin Domain Name': 'origin_domain_name',
- 'Origin Protocol Policy': 'origin_protocol_policy',
- 'Viewer Protocol Policy': 'viewer_protocol_policy',
- 'Allowed HTTP Methods': 'allowed_http_methods',
- 'Cached HTTP Methods': 'cached_http_methods',
- 'Zone ID': 'zone_id',
- 'Record Count': 'record_count',
- 'Domain name': 'domain_name',
- 'Additional names': 'additional_names',
- 'WebACL Name': 'webacl_name',
- 'Scope': 'scope',
- 'Rules Count': 'rules_count',
- 'Associated Resources': 'associated_resources',
- 'Topic Name': 'topic_name',
- 'Topic Display Name': 'display_name',
- 'Subscription Protocol': 'subscription_protocol',
- 'Subscription Endpoint': 'subscription_endpoint',
- 'Log Group Name': 'log_group_name',
- 'Retention Days': 'retention_days',
- 'Stored Bytes': 'stored_bytes',
- 'KMS Encryption': 'kms_encryption',
- 'Description': 'description',
- 'Event Bus': 'event_bus',
- 'State': 'state',
- 'Multi-Region Trail': 'multi_region',
- 'Log File Validation': 'log_file_validation',
- 'Regional Resources': 'regional_resources',
- 'Global Resources': 'global_resources',
- 'Retention period': 'retention_period',
- 'Port range': 'port_range',
- 'Source': 'source',
- 'Other Requirement': 'other_requirement',
- }
-
- return mappings.get(column_name, column_name.lower().replace(' ', '_'))
-
- def _find_implementation_list_section(self) -> Optional[int]:
- """
- Find the index of the 'Implementation List' section in the document.
-
- Returns:
- Index of the paragraph after the Implementation List heading, or None if not found
- """
- for i, paragraph in enumerate(self.document.paragraphs):
- text = paragraph.text.strip().lower()
- # Match variations like "4. Implementation List", "Implementation List", etc.
- if 'implementation list' in text:
- return i
- return None
-
- def _copy_table_style_from_template(self, table) -> None:
- """
- Apply consistent table styling matching the template format.
-
- Args:
- table: The table to style
- """
- # Try to use a template table style if available
- try:
- # First try to use 'Table Grid' which is a standard Word style
- table.style = 'Table Grid'
- except Exception:
- pass
-
- # Apply additional formatting for consistency
- tbl = table._tbl
- tblPr = tbl.tblPr if tbl.tblPr is not None else OxmlElement('w:tblPr')
-
- # Set table width to 100%
- tblW = OxmlElement('w:tblW')
- tblW.set(qn('w:w'), '5000')
- tblW.set(qn('w:type'), 'pct')
- tblPr.append(tblW)
-
- # Set table borders
- tblBorders = OxmlElement('w:tblBorders')
- for border_name in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
- border = OxmlElement(f'w:{border_name}')
- border.set(qn('w:val'), 'single')
- border.set(qn('w:sz'), '4')
- border.set(qn('w:space'), '0')
- border.set(qn('w:color'), '000000')
- tblBorders.append(border)
- tblPr.append(tblBorders)
-
- if tbl.tblPr is None:
- tbl.insert(0, tblPr)
-
- def _apply_header_cell_style(self, cell, is_title: bool = False) -> None:
- """
- Apply header cell styling (bold, background color, font, spacing).
-
- Args:
- cell: The cell to style
- is_title: If True, use title color (DAEEF3) and 12pt font, otherwise use header color (D9E2F3) and 11pt font
- """
- # Set background color for header cells
- tc = cell._tc
- tcPr = tc.get_or_add_tcPr()
- shd = OxmlElement('w:shd')
- shd.set(qn('w:val'), 'clear')
- shd.set(qn('w:color'), 'auto')
- # Service Name title uses DAEEF3 (light cyan), column headers use C6D9F1 (light blue)
- shd.set(qn('w:fill'), 'DAEEF3' if is_title else 'C6D9F1')
- tcPr.append(shd)
-
- # Apply font and paragraph formatting
- # Service Name (title) uses 12pt (小四), others use 11pt
- font_size = 12 if is_title else 11
- for paragraph in cell.paragraphs:
- self._apply_cell_paragraph_format(paragraph, font_size=font_size)
- for run in paragraph.runs:
- run.font.bold = True
-
- def _apply_cell_paragraph_format(self, paragraph, font_size: int = 11) -> None:
- """
- Apply standard cell paragraph formatting:
- - Font: Calibri
- - Spacing: 3pt before, 3pt after, single line spacing
-
- Args:
- paragraph: The paragraph to format
- font_size: Font size in points (default 11pt, use 12pt for Service Name)
- """
- from docx.shared import Pt
- from docx.enum.text import WD_LINE_SPACING
-
- # Set paragraph spacing: 3pt before, 3pt after, single line spacing
- paragraph.paragraph_format.space_before = Pt(3)
- paragraph.paragraph_format.space_after = Pt(3)
- paragraph.paragraph_format.line_spacing_rule = WD_LINE_SPACING.SINGLE
-
- # Set font for all runs
- for run in paragraph.runs:
- run.font.name = 'Calibri'
- run.font.size = Pt(font_size)
- # Set East Asian font
- run._element.rPr.rFonts.set(qn('w:eastAsia'), 'Calibri')
-
- def _apply_data_cell_style(self, cell) -> None:
- """
- Apply data cell styling (font 11pt, spacing, no background).
-
- Args:
- cell: The cell to style
- """
- for paragraph in cell.paragraphs:
- self._apply_cell_paragraph_format(paragraph, font_size=11)
-
- def add_service_tables(self, scan_results: Dict[str, List[Dict[str, Any]]],
- include_account_column: bool = False,
- regions: List[str] = None) -> None:
- """
- Add tables for all services with resources, filtering out empty services.
- Content is inserted into the existing 'Implementation List' section in the template,
- replacing any placeholder content.
-
- Services are grouped under their parent service heading (e.g., VPC, ELB, S3).
- When multiple regions are selected, regional services show region in heading.
- Global services are shown once without region suffix.
-
- Args:
- scan_results: Dictionary mapping service keys to lists of resources
- include_account_column: Whether to include AWS Account column
- regions: List of regions being scanned (for multi-region heading display)
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Find the existing Implementation List section and clear placeholder content
- impl_list_idx = self._find_implementation_list_section()
-
- if impl_list_idx is not None:
- # Clear placeholder content after Implementation List until next Heading 1
- self._clear_section_content(impl_list_idx)
-
- # Get the Implementation List paragraph and find insert position
- impl_paragraph = self.document.paragraphs[impl_list_idx]
- parent = impl_paragraph._element.getparent()
- insert_index = list(parent).index(impl_paragraph._element) + 1
- self._insert_parent = parent
- self._insert_index = insert_index
- else:
- # If not found, add a new section at the end
- self.document.add_paragraph('Implementation List', style='Heading 1')
- self._insert_parent = self.document._body._body
- self._insert_index = len(list(self._insert_parent))
-
- # Determine if we need to show region in headings (multiple regions selected)
- multi_region = regions and len(regions) > 1
-
- # Helper function to group resources by region
- def group_by_region(resources: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
- """Group resources by their region attribute."""
- grouped = {}
- for resource in resources:
- # Get region from resource attributes or direct field
- region = None
- if isinstance(resource, dict):
- region = resource.get('region') or resource.get('attributes', {}).get('region')
- if not region:
- region = 'global'
- if region not in grouped:
- grouped[region] = []
- grouped[region].append(resource)
- return grouped
-
- # Helper function to deduplicate global service resources
- def deduplicate_resources(resources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
- """Deduplicate resources by ID or name."""
- seen_ids = set()
- unique_resources = []
- for resource in resources:
- res_id = None
- if isinstance(resource, dict):
- res_id = resource.get('id') or resource.get('attributes', {}).get('id')
- if not res_id:
- res_id = resource.get('name') or resource.get('attributes', {}).get('name')
- if res_id and res_id in seen_ids:
- continue
- if res_id:
- seen_ids.add(res_id)
- unique_resources.append(resource)
- return unique_resources
-
- # Helper function to sort resources by name
- def sort_resources_by_name(resources: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
- """Sort resources by account (if multi-account) then by name. Resources without name come last."""
- def get_sort_key(resource: Dict[str, Any]) -> tuple:
- # Get account_id for multi-account sorting
- account_id = ''
- if isinstance(resource, dict):
- account_id = resource.get('account_id') or resource.get('attributes', {}).get('account_id') or ''
-
- # Get name
- name = None
- if isinstance(resource, dict):
- name = resource.get('name') or resource.get('attributes', {}).get('name')
-
- # Sort by: account_id, then has_name (0=has name, 1=no name), then name alphabetically
- if name and str(name).strip():
- return (str(account_id), 0, str(name).lower())
- return (str(account_id), 1, '')
- return sorted(resources, key=get_sort_key)
-
- # Helper function to add table for a service
- def add_service_table(service_key: str, resources: List[Dict[str, Any]]):
- config = SERVICE_CONFIG.get(service_key)
- if not config or not resources:
- return
- # Sort resources by name before adding to table
- sorted_resources = sort_resources_by_name(resources)
- if config['layout'] == TableLayout.HORIZONTAL:
- self._add_horizontal_table_at_position(service_key, sorted_resources, include_account_column)
- else:
- self._add_vertical_tables_at_position(service_key, sorted_resources, include_account_column)
-
- if multi_region:
- # Multi-region mode: organize by region first, then by service group
- # Step 1: Collect all regions from resources
- all_regions = set()
- for service_key in SERVICE_ORDER:
- resources = scan_results.get(service_key, [])
- if not resources:
- continue
- if service_key in GLOBAL_SERVICES:
- continue # Skip global services for region collection
- for resource in resources:
- region = None
- if isinstance(resource, dict):
- region = resource.get('region') or resource.get('attributes', {}).get('region')
- if region:
- all_regions.add(region)
-
- # Sort regions for consistent output (use provided regions order if available)
- if regions:
- sorted_regions = [r for r in regions if r in all_regions]
- # Add any regions found in resources but not in provided list
- for r in sorted(all_regions):
- if r not in sorted_regions:
- sorted_regions.append(r)
- else:
- sorted_regions = sorted(all_regions)
-
- # Step 2: Process regional services by region, then by service group
- for region in sorted_regions:
- added_groups_for_region = set()
-
- for service_key in SERVICE_ORDER:
- # Skip global services
- if service_key in GLOBAL_SERVICES:
- continue
-
- resources = scan_results.get(service_key, [])
- if not resources:
- continue
-
- config = SERVICE_CONFIG.get(service_key)
- if not config:
- continue
-
- # Filter resources for this region
- region_resources = []
- for resource in resources:
- res_region = None
- if isinstance(resource, dict):
- res_region = resource.get('region') or resource.get('attributes', {}).get('region')
- if res_region == region:
- region_resources.append(resource)
-
- if not region_resources:
- continue
-
- # Get the service group for this service
- service_group = SERVICE_GROUPS.get(service_key, config['title'])
-
- # Add Heading 2 with region suffix if not already added for this region
- if service_group not in added_groups_for_region:
- self._add_heading2_at_position(f"{service_group} ({region})")
- added_groups_for_region.add(service_group)
-
- # Add the table(s) for this service
- add_service_table(service_key, region_resources)
-
- # Step 3: Process global services (without region suffix)
- added_global_groups = set()
- for service_key in SERVICE_ORDER:
- if service_key not in GLOBAL_SERVICES:
- continue
-
- resources = scan_results.get(service_key, [])
- if not resources:
- continue
-
- config = SERVICE_CONFIG.get(service_key)
- if not config:
- continue
-
- # Deduplicate global service resources
- unique_resources = deduplicate_resources(resources)
- if not unique_resources:
- continue
-
- # Get the service group for this service
- service_group = SERVICE_GROUPS.get(service_key, config['title'])
-
- # Add Heading 2 without region suffix
- if service_group not in added_global_groups:
- self._add_heading2_at_position(service_group)
- added_global_groups.add(service_group)
-
- # Add the table(s) for this service
- add_service_table(service_key, unique_resources)
-
- else:
- # Single region or no region info: original behavior
- added_groups = set()
-
- # Check if ACM should show us-east-1 suffix
- # (when single region selected and it's not us-east-1)
- acm_needs_region_suffix = (
- regions and
- len(regions) == 1 and
- 'us-east-1' not in regions
- )
-
- for service_key in SERVICE_ORDER:
- resources = scan_results.get(service_key, [])
- if not resources:
- continue
-
- config = SERVICE_CONFIG.get(service_key)
- if not config:
- continue
-
- # Deduplicate global services
- if service_key in GLOBAL_SERVICES:
- resources = deduplicate_resources(resources)
- if not resources:
- continue
-
- # Get the service group for this service
- service_group = SERVICE_GROUPS.get(service_key, config['title'])
-
- # Special handling for ACM: add (us-east-1) suffix when needed
- if service_key == 'acm' and acm_needs_region_suffix:
- display_group = f"{service_group} (us-east-1)"
- else:
- display_group = service_group
-
- # Add Heading 2 for the service group if not already added
- if service_group not in added_groups:
- self._add_heading2_at_position(display_group)
- added_groups.add(service_group)
-
- # Add the table(s) for this service
- add_service_table(service_key, resources)
-
- # Add page break after Implementation List section
- self._add_page_break_at_position()
-
- def _add_page_break_at_position(self) -> None:
- """Add a page break at the tracked position."""
- from docx.oxml import OxmlElement
- from docx.oxml.ns import qn
-
- # Create a paragraph with page break
- p = self.document.add_paragraph()
- run = p.add_run()
- br = OxmlElement('w:br')
- br.set(qn('w:type'), 'page')
- run._r.append(br)
-
- # Move to correct position
- p_element = p._element
- p_element.getparent().remove(p_element)
- self._insert_element_at_position(p_element)
-
- def _add_heading2_at_position(self, title: str) -> None:
- """
- Add a Heading 2 paragraph at the tracked position.
-
- Args:
- title: The heading title (service group name)
- """
- heading = self.document.add_paragraph(f'▼ {title}', style='Heading 2')
- heading_element = heading._element
- heading_element.getparent().remove(heading_element)
- self._insert_element_at_position(heading_element)
-
- def _clear_section_content(self, section_start_idx: int) -> None:
- """
- Clear content between a section heading and the next Heading 1.
-
- Args:
- section_start_idx: Index of the section heading paragraph
- """
- # Find elements to remove (between this Heading 1 and next Heading 1)
- elements_to_remove = []
- body = self.document._body._body
-
- start_para = self.document.paragraphs[section_start_idx]
- start_element = start_para._element
-
- # Find the position of start element in body
- body_children = list(body)
- try:
- start_pos = body_children.index(start_element)
- except ValueError:
- return
-
- # Iterate through elements after the heading
- for i in range(start_pos + 1, len(body_children)):
- elem = body_children[i]
-
- # Check if this is a Heading 1 paragraph (next section)
- if elem.tag.endswith('}p'):
- # Check if it's a Heading 1
- pStyle = elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}pStyle')
- if pStyle is not None:
- style_val = pStyle.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val')
- if style_val and ('Heading1' in style_val or style_val == '1'):
- break
-
- elements_to_remove.append(elem)
-
- # Remove the elements
- for elem in elements_to_remove:
- body.remove(elem)
-
- def filter_empty_services(self, scan_results: Dict[str, List[Dict[str, Any]]]) -> Dict[str, List[Dict[str, Any]]]:
- """
- Filter out services with no resources.
-
- Args:
- scan_results: Dictionary mapping service keys to lists of resources
-
- Returns:
- Filtered dictionary with only non-empty services
- """
- return {k: v for k, v in scan_results.items() if v}
-
- def get_services_with_resources(self, scan_results: Dict[str, List[Dict[str, Any]]]) -> List[str]:
- """
- Get list of service keys that have resources.
-
- Args:
- scan_results: Dictionary mapping service keys to lists of resources
-
- Returns:
- List of service keys with resources
- """
- return [k for k in SERVICE_ORDER if scan_results.get(k)]
-
- def replace_architecture_picture_placeholder(self, image_path: str, width_inches: float = 6.0) -> bool:
- """
- Replace [AWS Architecture Picture] placeholder with actual image.
-
- This method searches for the placeholder text in paragraphs and replaces it
- with the provided image.
-
- Args:
- image_path: Path to the architecture diagram image file
- width_inches: Width of the image in inches (default 6.0)
-
- Returns:
- True if placeholder was found and replaced, False otherwise
-
- Raises:
- FileNotFoundError: If image file doesn't exist
- ValueError: If no document is loaded
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if not os.path.exists(image_path):
- raise FileNotFoundError(f"Image file not found: {image_path}")
-
- placeholder_text = '[AWS Architecture Picture]'
- placeholder_found = False
-
- # Search in paragraphs
- for paragraph in self.document.paragraphs:
- if placeholder_text in paragraph.text:
- # Found the placeholder, replace it with image
- # Clear the paragraph text first
- full_text = paragraph.text
- new_text = full_text.replace(placeholder_text, '')
-
- # Clear all runs
- for run in paragraph.runs:
- run.text = ''
-
- # Add the image to this paragraph
- run = paragraph.add_run()
- run.add_picture(image_path, width=Inches(width_inches))
-
- # If there was other text, add it back
- if new_text.strip():
- paragraph.add_run(new_text)
-
- # Center the paragraph
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
- placeholder_found = True
- break
-
- # Also search in tables (in case placeholder is in a table cell)
- if not placeholder_found:
- for table in self.document.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- if placeholder_text in paragraph.text:
- # Clear the paragraph text first
- full_text = paragraph.text
- new_text = full_text.replace(placeholder_text, '')
-
- # Clear all runs
- for run in paragraph.runs:
- run.text = ''
-
- # Add the image to this paragraph
- run = paragraph.add_run()
- run.add_picture(image_path, width=Inches(width_inches))
-
- # If there was other text, add it back
- if new_text.strip():
- paragraph.add_run(new_text)
-
- # Center the paragraph
- paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
- placeholder_found = True
- break
- if placeholder_found:
- break
- if placeholder_found:
- break
- if placeholder_found:
- break
-
- return placeholder_found
-
- def clear_architecture_picture_placeholder(self) -> bool:
- """
- Remove [AWS Architecture Picture] placeholder from the document.
-
- This method is called when no architecture image is provided,
- to clean up the placeholder text.
-
- Returns:
- True if placeholder was found and removed, False otherwise
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- placeholder_text = '[AWS Architecture Picture]'
- placeholder_found = False
-
- # Search in paragraphs
- for paragraph in self.document.paragraphs:
- if placeholder_text in paragraph.text:
- # Remove the placeholder text
- for run in paragraph.runs:
- if placeholder_text in run.text:
- run.text = run.text.replace(placeholder_text, '')
- placeholder_found = True
-
- # Also search in tables
- for table in self.document.tables:
- for row in table.rows:
- for cell in row.cells:
- for paragraph in cell.paragraphs:
- if placeholder_text in paragraph.text:
- for run in paragraph.runs:
- if placeholder_text in run.text:
- run.text = run.text.replace(placeholder_text, '')
- placeholder_found = True
-
- return placeholder_found
- def embed_network_diagram(self, image_path: str, width_inches: float = 6.0) -> None:
- """
- Embed a network diagram image into the document.
-
- Args:
- image_path: Path to the image file
- width_inches: Width of the image in inches
-
- Raises:
- FileNotFoundError: If image file doesn't exist
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- if not os.path.exists(image_path):
- raise FileNotFoundError(f"Image file not found: {image_path}")
-
- # Find the Network Diagram section or add one
- network_section_found = False
- for i, paragraph in enumerate(self.document.paragraphs):
- if 'Network Diagram' in paragraph.text or 'Network Architecture' in paragraph.text:
- network_section_found = True
- # Add image after this paragraph
- # We need to insert after this paragraph
- break
-
- if not network_section_found:
- # Add a new section for network diagram
- self.document.add_paragraph('Network Diagram', style='Heading 1')
-
- # Add the image
- self.document.add_picture(image_path, width=Inches(width_inches))
-
- # Center the image
- last_paragraph = self.document.paragraphs[-1]
- last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-
- # Add spacing
- self.document.add_paragraph()
-
- def update_table_of_contents(self) -> None:
- """
- Update the table of contents in the document.
-
- Note: Full TOC update requires Word application. This method adds
- a field code that will update when the document is opened in Word.
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Find existing TOC or add instruction
- # python-docx cannot fully update TOC without Word application
- # We add a field that will prompt update when opened
-
- # Set document to update fields when opened
- # self._set_update_fields_on_open()
-
- for paragraph in self.document.paragraphs:
- # Look for TOC field
- for run in paragraph.runs:
- if 'TOC' in run.text or 'Table of Contents' in run.text:
- # Mark TOC for update
- self._mark_toc_for_update(paragraph)
- return
-
- def _set_update_fields_on_open(self) -> None:
- """
- Set the document to update all fields (including TOC) when opened in Word.
-
- This adds the updateFields setting to the document settings, which causes
- Word to prompt the user to update fields when the document is opened.
- """
- try:
- # Access the document settings element
- settings_element = self.document.settings.element
-
- # Create or find the updateFields element
- # Namespace for Word ML
- w_ns = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
-
- # Check if updateFields already exists
- update_fields = settings_element.find(f'{w_ns}updateFields')
-
- if update_fields is None:
- # Create the updateFields element
- update_fields = OxmlElement('w:updateFields')
- update_fields.set(qn('w:val'), 'true')
- settings_element.append(update_fields)
- else:
- # Ensure it's set to true
- update_fields.set(qn('w:val'), 'true')
-
- except Exception as e:
- # Log but don't fail - TOC update is not critical
- print(f"Warning: Could not set updateFields on open: {e}")
-
- def _mark_toc_for_update(self, paragraph) -> None:
- """
- Mark a TOC paragraph for update when document is opened.
-
- Args:
- paragraph: The TOC paragraph
- """
- # Add updateFields setting to document
- # This will prompt Word to update fields when opened
- try:
- # The updateFields setting is already set in _set_update_fields_on_open
- # This method can be used for additional TOC-specific handling if needed
- pass
- except Exception:
- pass # Settings may not be accessible
-
- def add_update_history(self, version: str = '1.0', modifier: str = '', details: str = '') -> None:
- """
- Add or update the Update History section.
-
- Args:
- version: Document version
- modifier: Name of the person who modified
- details: Details of the changes
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Find Update History section
- for i, paragraph in enumerate(self.document.paragraphs):
- if 'Update History' in paragraph.text or 'Revision History' in paragraph.text:
- # Found the section, look for the table
- # Add entry to existing table or create new one
- break
-
- # Create update history entry
- now = datetime.now()
- history_entry = {
- 'version': version,
- 'date': now.strftime('%Y-%m-%d'),
- 'modifier': modifier,
- 'details': details or 'Initial version'
- }
-
- # This would typically update an existing table
- # For now, we ensure the data is available for template replacement
-
- def save(self, output_path: str) -> str:
- """
- Save the document to a file.
-
- Args:
- output_path: Path where to save the document
-
- Returns:
- The path where the document was saved
- """
- if not self.document:
- raise ValueError("No document loaded. Call load_template() first.")
-
- # Ensure directory exists
- os.makedirs(os.path.dirname(output_path), exist_ok=True)
-
- self.document.save(output_path)
- return output_path
-
- def get_file_size(self, file_path: str) -> int:
- """
- Get the size of a file in bytes.
-
- Args:
- file_path: Path to the file
-
- Returns:
- File size in bytes
- """
- return os.path.getsize(file_path)
-
- def generate_report(self, scan_results: Dict[str, List[Dict[str, Any]]],
- project_metadata: Dict[str, Any],
- output_path: str,
- network_diagram_path: str = None,
- template_path: str = None,
- regions: List[str] = None) -> Dict[str, Any]:
- """
- Generate a complete report from scan results.
-
- This is the main entry point for report generation.
-
- Args:
- scan_results: Dictionary mapping service keys to lists of resources
- project_metadata: Project metadata for placeholder replacement
- output_path: Path where to save the generated report
- network_diagram_path: Optional path to network diagram image
- template_path: Optional path to template file
- regions: Optional list of regions being scanned (for multi-region heading display)
-
- Returns:
- Dictionary with report metadata:
- - file_path: Path to the generated report
- - file_name: Name of the report file
- - file_size: Size of the report in bytes
- - services_included: List of services included in the report
- """
- # Load template
- self.load_template(template_path)
-
- # Create placeholder replacements
- replacements = self.create_project_metadata_replacements(project_metadata)
-
- # Replace placeholders
- self.replace_placeholders(replacements)
-
- # Filter empty services
- filtered_results = self.filter_empty_services(scan_results)
-
- # Determine if multi-account (need AWS Account column)
- account_ids = set()
- for resources in filtered_results.values():
- for resource in resources:
- # Handle both dict and ResourceData objects
- if isinstance(resource, dict):
- if 'account_id' in resource:
- account_ids.add(resource['account_id'])
- elif hasattr(resource, 'account_id'):
- account_ids.add(resource.account_id)
- include_account_column = len(account_ids) > 1
-
- # Add service tables with region info
- self.add_service_tables(filtered_results, include_account_column, regions)
-
- # Handle architecture picture placeholder
- if network_diagram_path and os.path.exists(network_diagram_path):
- # Replace placeholder with actual image
- self.replace_architecture_picture_placeholder(network_diagram_path)
- else:
- # No image provided, clear the placeholder
- self.clear_architecture_picture_placeholder()
-
- # Update table of contents
- self.update_table_of_contents()
-
- # Add update history
- self.add_update_history(
- version='1.0',
- modifier=project_metadata.get('cloudEngineer') or project_metadata.get('cloud_engineer', ''),
- details='Initial AWS resource inventory report'
- )
-
- # Save the document
- self.save(output_path)
-
- # Get file info
- file_size = self.get_file_size(output_path)
- file_name = os.path.basename(output_path)
-
- return {
- 'file_path': output_path,
- 'file_name': file_name,
- 'file_size': file_size,
- 'services_included': list(filtered_results.keys()),
- 'accounts_count': len(account_ids),
- }
- def generate_report_filename(project_metadata: Dict[str, Any]) -> str:
- """
- Generate a report filename from project metadata.
-
- Args:
- project_metadata: Project metadata dictionary
-
- Returns:
- Generated filename
- """
- # Support both camelCase (from frontend) and snake_case
- client_name = project_metadata.get('clientName') or project_metadata.get('client_name', 'Client')
- project_name = project_metadata.get('projectName') or project_metadata.get('project_name', 'Project')
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-
- # Sanitize names for filename
- client_name = re.sub(r'[^\w\s-]', '', client_name).strip().replace(' ', '-')
- project_name = re.sub(r'[^\w\s-]', '', project_name).strip().replace(' ', '-')
-
- return f"{client_name}-{project_name}-Report-{timestamp}.docx"
|