cloudshell_scanner.py 129 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384
  1. #!/usr/bin/env python3
  2. """
  3. CloudShell Scanner - AWS Resource Scanner for CloudShell Environment
  4. A standalone Python script that scans AWS resources using CloudShell's IAM credentials.
  5. This script is designed to run in AWS CloudShell without requiring Access Keys.
  6. Requirements:
  7. - 1.1: Single-file Python script, only depends on boto3 and Python standard library
  8. - 1.2: Automatically uses CloudShell environment's IAM credentials
  9. - 1.7: Displays progress information during scanning
  10. Usage:
  11. # Scan all regions
  12. python cloudshell_scanner.py
  13. # Scan specific regions
  14. python cloudshell_scanner.py --regions us-east-1,ap-northeast-1
  15. # Specify output file
  16. python cloudshell_scanner.py --output my_scan.json
  17. # Scan specific services
  18. python cloudshell_scanner.py --services ec2,vpc,rds
  19. """
  20. import argparse
  21. import json
  22. import logging
  23. import sys
  24. import time
  25. from datetime import datetime, timezone
  26. from functools import wraps
  27. from typing import Any, Callable, Dict, List, Optional, TypeVar
  28. import boto3
  29. from botocore.exceptions import BotoCoreError, ClientError
  30. # Type variable for generic retry decorator
  31. T = TypeVar("T")
  32. # Scanner version
  33. __version__ = "1.0.0"
  34. # Configure logging
  35. logging.basicConfig(
  36. level=logging.INFO,
  37. format="%(asctime)s - %(levelname)s - %(message)s",
  38. datefmt="%Y-%m-%d %H:%M:%S",
  39. )
  40. logger = logging.getLogger(__name__)
  41. # Retryable exceptions for exponential backoff
  42. RETRYABLE_EXCEPTIONS = (
  43. ClientError,
  44. BotoCoreError,
  45. ConnectionError,
  46. TimeoutError,
  47. )
  48. # Retryable error codes from AWS
  49. RETRYABLE_ERROR_CODES = {
  50. "Throttling",
  51. "ThrottlingException",
  52. "RequestThrottled",
  53. "RequestLimitExceeded",
  54. "ProvisionedThroughputExceededException",
  55. "ServiceUnavailable",
  56. "InternalError",
  57. "RequestTimeout",
  58. "RequestTimeoutException",
  59. }
  60. def retry_with_exponential_backoff(
  61. max_retries: int = 3,
  62. base_delay: float = 1.0,
  63. max_delay: float = 30.0,
  64. exponential_base: float = 2.0,
  65. ) -> Callable:
  66. """
  67. Decorator that implements retry logic with exponential backoff.
  68. This decorator will retry a function call if it raises a retryable exception.
  69. The delay between retries increases exponentially.
  70. Args:
  71. max_retries: Maximum number of retry attempts (default: 3)
  72. base_delay: Initial delay in seconds (default: 1.0)
  73. max_delay: Maximum delay in seconds (default: 30.0)
  74. exponential_base: Base for exponential calculation (default: 2.0)
  75. Returns:
  76. Decorated function with retry logic
  77. Requirements:
  78. - 1.8: Record errors and continue scanning other resources
  79. - Design: Network timeout - retry 3 times with exponential backoff
  80. """
  81. def decorator(func: Callable[..., T]) -> Callable[..., T]:
  82. @wraps(func)
  83. def wrapper(*args, **kwargs) -> T:
  84. last_exception = None
  85. for attempt in range(max_retries + 1):
  86. try:
  87. return func(*args, **kwargs)
  88. except RETRYABLE_EXCEPTIONS as e:
  89. last_exception = e
  90. # Check if it's a retryable error code for ClientError
  91. if isinstance(e, ClientError):
  92. error_code = e.response.get("Error", {}).get("Code", "")
  93. if error_code not in RETRYABLE_ERROR_CODES:
  94. # Not a retryable error, raise immediately
  95. raise
  96. if attempt < max_retries:
  97. # Calculate delay with exponential backoff
  98. delay = min(
  99. base_delay * (exponential_base ** attempt),
  100. max_delay
  101. )
  102. logger.warning(
  103. f"Attempt {attempt + 1}/{max_retries + 1} failed for "
  104. f"{func.__name__}: {str(e)}. Retrying in {delay:.1f}s..."
  105. )
  106. time.sleep(delay)
  107. else:
  108. logger.error(
  109. f"All {max_retries + 1} attempts failed for "
  110. f"{func.__name__}: {str(e)}"
  111. )
  112. # All retries exhausted, raise the last exception
  113. if last_exception:
  114. raise last_exception
  115. return wrapper
  116. return decorator
  117. def is_retryable_error(exception: Exception) -> bool:
  118. """
  119. Check if an exception is retryable.
  120. Args:
  121. exception: The exception to check
  122. Returns:
  123. True if the exception is retryable, False otherwise
  124. """
  125. if isinstance(exception, ClientError):
  126. error_code = exception.response.get("Error", {}).get("Code", "")
  127. return error_code in RETRYABLE_ERROR_CODES
  128. return isinstance(exception, RETRYABLE_EXCEPTIONS)
  129. class ProgressDisplay:
  130. """
  131. Progress display utility for showing scan progress.
  132. Requirements:
  133. - 1.7: Displays progress information during scanning
  134. """
  135. def __init__(self, total_tasks: int = 0):
  136. """
  137. Initialize progress display.
  138. Args:
  139. total_tasks: Total number of tasks to track
  140. """
  141. self.total_tasks = total_tasks
  142. self.completed_tasks = 0
  143. self.current_service = ""
  144. self.current_region = ""
  145. def set_total(self, total: int) -> None:
  146. """Set total number of tasks."""
  147. self.total_tasks = total
  148. self.completed_tasks = 0
  149. def update(self, service: str, region: str, status: str = "scanning") -> None:
  150. """
  151. Update progress display.
  152. Args:
  153. service: Current service being scanned
  154. region: Current region being scanned
  155. status: Status message
  156. """
  157. self.current_service = service
  158. self.current_region = region
  159. if self.total_tasks > 0:
  160. percentage = (self.completed_tasks / self.total_tasks) * 100
  161. progress_bar = self._create_progress_bar(percentage)
  162. print(
  163. f"\r{progress_bar} {percentage:5.1f}% | {status}: {service} in {region}",
  164. end="",
  165. flush=True,
  166. )
  167. else:
  168. print(f"\r{status}: {service} in {region}", end="", flush=True)
  169. def increment(self) -> None:
  170. """Increment completed tasks counter."""
  171. self.completed_tasks += 1
  172. def complete(self, message: str = "Scan completed") -> None:
  173. """
  174. Mark progress as complete.
  175. Args:
  176. message: Completion message
  177. """
  178. if self.total_tasks > 0:
  179. progress_bar = self._create_progress_bar(100)
  180. print(f"\r{progress_bar} 100.0% | {message}")
  181. else:
  182. print(f"\r{message}")
  183. def _create_progress_bar(self, percentage: float, width: int = 30) -> str:
  184. """
  185. Create a text-based progress bar.
  186. Args:
  187. percentage: Completion percentage (0-100)
  188. width: Width of the progress bar
  189. Returns:
  190. Progress bar string
  191. """
  192. filled = int(width * percentage / 100)
  193. bar = "█" * filled + "░" * (width - filled)
  194. return f"[{bar}]"
  195. def log_error(self, service: str, region: str, error: str) -> None:
  196. """
  197. Log an error during scanning.
  198. Args:
  199. service: Service that encountered the error
  200. region: Region where the error occurred
  201. error: Error message
  202. """
  203. # Print newline to avoid overwriting progress bar
  204. print()
  205. logger.warning(f"Error scanning {service} in {region}: {error}")
  206. class CloudShellScanner:
  207. """
  208. CloudShell environment AWS resource scanner.
  209. This class provides functionality to scan AWS resources using the IAM credentials
  210. automatically available in the CloudShell environment.
  211. Requirements:
  212. - 1.1: Single-file Python script, only depends on boto3 and Python standard library
  213. - 1.2: Automatically uses CloudShell environment's IAM credentials
  214. - 1.7: Displays progress information during scanning
  215. Attributes:
  216. SUPPORTED_SERVICES: List of all supported AWS services
  217. GLOBAL_SERVICES: List of global services (not region-specific)
  218. """
  219. # All supported AWS services (must match AWSScanner.SUPPORTED_SERVICES)
  220. SUPPORTED_SERVICES: List[str] = [
  221. "vpc", "subnet", "route_table", "internet_gateway", "nat_gateway",
  222. "security_group", "vpc_endpoint", "vpc_peering",
  223. "customer_gateway", "virtual_private_gateway", "vpn_connection",
  224. "ec2", "elastic_ip",
  225. "autoscaling", "elb", "target_group",
  226. "rds", "elasticache",
  227. "eks", "lambda", "s3", "s3_event_notification",
  228. "cloudfront", "route53", "acm", "waf",
  229. "sns", "cloudwatch", "eventbridge", "cloudtrail", "config",
  230. ]
  231. # Global services (not region-specific)
  232. GLOBAL_SERVICES: List[str] = [
  233. "cloudfront", "route53", "waf", "s3", "s3_event_notification", "cloudtrail"
  234. ]
  235. def __init__(self):
  236. """
  237. Initialize the CloudShell scanner.
  238. Automatically uses CloudShell environment's IAM credentials via boto3's
  239. default credential chain.
  240. Requirements:
  241. - 1.2: Automatically uses CloudShell environment's IAM credentials
  242. """
  243. self._account_id: Optional[str] = None
  244. self._session: Optional[boto3.Session] = None
  245. self.progress = ProgressDisplay()
  246. # Initialize session using default credentials (CloudShell IAM)
  247. try:
  248. self._session = boto3.Session()
  249. logger.info("Initialized CloudShell scanner with default credentials")
  250. except Exception as e:
  251. logger.error(f"Failed to initialize boto3 session: {e}")
  252. raise
  253. def get_account_id(self) -> str:
  254. """
  255. Get the current AWS account ID.
  256. Returns:
  257. AWS account ID string
  258. Raises:
  259. Exception: If unable to retrieve account ID
  260. """
  261. if self._account_id:
  262. return self._account_id
  263. try:
  264. sts_client = self._session.client("sts")
  265. response = sts_client.get_caller_identity()
  266. self._account_id = response["Account"]
  267. logger.info(f"Retrieved account ID: {self._account_id}")
  268. return self._account_id
  269. except Exception as e:
  270. logger.error(f"Failed to get account ID: {e}")
  271. raise
  272. def list_regions(self) -> List[str]:
  273. """
  274. List all available AWS regions.
  275. Returns:
  276. List of region names
  277. Requirements:
  278. - 1.4: Scan all available regions when not specified
  279. """
  280. try:
  281. ec2_client = self._session.client("ec2", region_name="us-east-1")
  282. response = ec2_client.describe_regions()
  283. regions = [region["RegionName"] for region in response["Regions"]]
  284. logger.info(f"Found {len(regions)} available regions")
  285. return regions
  286. except Exception as e:
  287. logger.warning(f"Failed to list regions, using defaults: {e}")
  288. # Return default regions if API call fails
  289. return self._get_default_regions()
  290. def _get_default_regions(self) -> List[str]:
  291. """
  292. Get default AWS regions as fallback.
  293. Returns:
  294. List of default region names
  295. """
  296. return [
  297. "us-east-1", "us-east-2", "us-west-1", "us-west-2",
  298. "eu-west-1", "eu-west-2", "eu-west-3", "eu-central-1",
  299. "ap-northeast-1", "ap-northeast-2", "ap-southeast-1", "ap-southeast-2",
  300. "ap-south-1", "sa-east-1", "ca-central-1",
  301. ]
  302. def filter_regions(
  303. self,
  304. requested_regions: Optional[List[str]] = None,
  305. ) -> List[str]:
  306. """
  307. Filter and validate requested regions against available regions.
  308. This method implements region filtering logic:
  309. - If no regions specified, returns all available regions
  310. - If regions specified, validates them against available regions
  311. - Invalid regions are logged and filtered out
  312. Args:
  313. requested_regions: List of regions requested by user (None = all regions)
  314. Returns:
  315. List of valid region names to scan
  316. Requirements:
  317. - 1.3: Scan only specified regions when provided
  318. - 1.4: Scan all available regions when not specified
  319. """
  320. # Get all available regions
  321. available_regions = self.list_regions()
  322. available_set = set(available_regions)
  323. # If no regions specified, return all available regions
  324. if requested_regions is None:
  325. logger.info(f"No regions specified, will scan all {len(available_regions)} available regions")
  326. return available_regions
  327. # Validate requested regions
  328. valid_regions = []
  329. invalid_regions = []
  330. for region in requested_regions:
  331. # Normalize region name (strip whitespace, lowercase)
  332. normalized_region = region.strip().lower()
  333. if normalized_region in available_set:
  334. valid_regions.append(normalized_region)
  335. else:
  336. invalid_regions.append(region)
  337. # Log invalid regions
  338. if invalid_regions:
  339. logger.warning(
  340. f"Ignoring invalid/unavailable regions: {invalid_regions}. "
  341. f"Available regions: {sorted(available_regions)}"
  342. )
  343. # If no valid regions remain, fall back to all available regions
  344. if not valid_regions:
  345. logger.warning(
  346. "No valid regions specified, falling back to all available regions"
  347. )
  348. return available_regions
  349. logger.info(f"Will scan {len(valid_regions)} specified regions: {valid_regions}")
  350. return valid_regions
  351. def validate_region(self, region: str) -> bool:
  352. """
  353. Validate if a region is available.
  354. Args:
  355. region: Region name to validate
  356. Returns:
  357. True if region is valid, False otherwise
  358. """
  359. try:
  360. available_regions = self.list_regions()
  361. return region.strip().lower() in set(available_regions)
  362. except Exception:
  363. # If we can't validate, assume it's valid and let the API call fail
  364. return True
  365. def scan_resources(
  366. self,
  367. regions: Optional[List[str]] = None,
  368. services: Optional[List[str]] = None,
  369. ) -> Dict[str, Any]:
  370. """
  371. Scan AWS resources across specified regions and services.
  372. Args:
  373. regions: List of regions to scan (None = all available regions)
  374. services: List of services to scan (None = all supported services)
  375. Returns:
  376. Dictionary containing scan results with metadata, resources, and errors
  377. Requirements:
  378. - 1.3: Scan only specified regions when provided
  379. - 1.4: Scan all available regions when not specified
  380. - 1.5: Scan all supported service types
  381. - 1.7: Display progress information during scanning
  382. - 1.8: Record errors and continue scanning other resources
  383. """
  384. # Get account ID
  385. account_id = self.get_account_id()
  386. # Filter and validate regions
  387. regions_to_scan = self.filter_regions(regions)
  388. logger.info(f"Scanning {len(regions_to_scan)} regions")
  389. # Determine services to scan
  390. services_to_scan = services if services else self.SUPPORTED_SERVICES.copy()
  391. logger.info(f"Scanning {len(services_to_scan)} services")
  392. # Validate services
  393. invalid_services = [s for s in services_to_scan if s not in self.SUPPORTED_SERVICES]
  394. if invalid_services:
  395. logger.warning(f"Ignoring unsupported services: {invalid_services}")
  396. services_to_scan = [s for s in services_to_scan if s in self.SUPPORTED_SERVICES]
  397. # Separate global and regional services
  398. global_services = [s for s in services_to_scan if s in self.GLOBAL_SERVICES]
  399. regional_services = [s for s in services_to_scan if s not in self.GLOBAL_SERVICES]
  400. # Calculate total tasks for progress tracking
  401. total_tasks = len(global_services) + (len(regional_services) * len(regions_to_scan))
  402. self.progress.set_total(total_tasks)
  403. # Initialize result structure
  404. result: Dict[str, Any] = {
  405. "metadata": {
  406. "account_id": account_id,
  407. "scan_timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
  408. "regions_scanned": regions_to_scan,
  409. "services_scanned": services_to_scan,
  410. "scanner_version": __version__,
  411. "total_resources": 0,
  412. "total_errors": 0,
  413. },
  414. "resources": {},
  415. "errors": [],
  416. }
  417. # Scan global services first (only once, not per region)
  418. if global_services:
  419. logger.info(f"Scanning {len(global_services)} global services")
  420. self._scan_global_services(
  421. account_id=account_id,
  422. services=global_services,
  423. result=result,
  424. )
  425. # Scan regional services
  426. if regional_services and regions_to_scan:
  427. logger.info(f"Scanning {len(regional_services)} regional services across {len(regions_to_scan)} regions")
  428. self._scan_regional_services(
  429. account_id=account_id,
  430. regions=regions_to_scan,
  431. services=regional_services,
  432. result=result,
  433. )
  434. # Update metadata totals
  435. result["metadata"]["total_resources"] = sum(
  436. len(resources) for resources in result["resources"].values()
  437. )
  438. result["metadata"]["total_errors"] = len(result["errors"])
  439. self.progress.complete(
  440. f"Scan completed: {result['metadata']['total_resources']} resources, "
  441. f"{result['metadata']['total_errors']} errors"
  442. )
  443. return result
  444. def _call_with_retry(
  445. self,
  446. func: Callable[..., T],
  447. *args,
  448. max_retries: int = 3,
  449. base_delay: float = 1.0,
  450. **kwargs,
  451. ) -> T:
  452. """
  453. Call a function with retry logic and exponential backoff.
  454. This method wraps API calls with retry logic for transient failures.
  455. Args:
  456. func: Function to call
  457. *args: Positional arguments for the function
  458. max_retries: Maximum number of retry attempts
  459. base_delay: Initial delay in seconds
  460. **kwargs: Keyword arguments for the function
  461. Returns:
  462. Result of the function call
  463. Raises:
  464. Exception: If all retries are exhausted
  465. Requirements:
  466. - 1.8: Record errors and continue scanning other resources
  467. - Design: Network timeout - retry 3 times with exponential backoff
  468. """
  469. last_exception = None
  470. for attempt in range(max_retries + 1):
  471. try:
  472. return func(*args, **kwargs)
  473. except RETRYABLE_EXCEPTIONS as e:
  474. last_exception = e
  475. # Check if it's a retryable error code for ClientError
  476. if isinstance(e, ClientError):
  477. error_code = e.response.get("Error", {}).get("Code", "")
  478. if error_code not in RETRYABLE_ERROR_CODES:
  479. # Not a retryable error, raise immediately
  480. raise
  481. if attempt < max_retries:
  482. # Calculate delay with exponential backoff
  483. delay = min(base_delay * (2 ** attempt), 30.0)
  484. logger.warning(
  485. f"Attempt {attempt + 1}/{max_retries + 1} failed: {str(e)}. "
  486. f"Retrying in {delay:.1f}s..."
  487. )
  488. time.sleep(delay)
  489. else:
  490. logger.error(f"All {max_retries + 1} attempts failed: {str(e)}")
  491. # All retries exhausted, raise the last exception
  492. if last_exception:
  493. raise last_exception
  494. def _scan_global_services(
  495. self,
  496. account_id: str,
  497. services: List[str],
  498. result: Dict[str, Any],
  499. ) -> None:
  500. """
  501. Scan global AWS services.
  502. Args:
  503. account_id: AWS account ID
  504. services: List of global services to scan
  505. result: Result dictionary to update
  506. Requirements:
  507. - 1.8: Record errors and continue scanning other resources
  508. """
  509. for service in services:
  510. self.progress.update(service, "global", "Scanning")
  511. try:
  512. resources = self._scan_service(
  513. account_id=account_id,
  514. region="global",
  515. service=service,
  516. )
  517. if resources:
  518. if service not in result["resources"]:
  519. result["resources"][service] = []
  520. result["resources"][service].extend(resources)
  521. except Exception as e:
  522. # Capture detailed error information
  523. error_info = self._create_error_info(
  524. service=service,
  525. region="global",
  526. exception=e,
  527. )
  528. result["errors"].append(error_info)
  529. self.progress.log_error(service, "global", str(e))
  530. self.progress.increment()
  531. def _scan_regional_services(
  532. self,
  533. account_id: str,
  534. regions: List[str],
  535. services: List[str],
  536. result: Dict[str, Any],
  537. ) -> None:
  538. """
  539. Scan regional AWS services.
  540. Args:
  541. account_id: AWS account ID
  542. regions: List of regions to scan
  543. services: List of regional services to scan
  544. result: Result dictionary to update
  545. Requirements:
  546. - 1.8: Record errors and continue scanning other resources
  547. """
  548. for region in regions:
  549. for service in services:
  550. self.progress.update(service, region, "Scanning")
  551. try:
  552. resources = self._scan_service(
  553. account_id=account_id,
  554. region=region,
  555. service=service,
  556. )
  557. if resources:
  558. if service not in result["resources"]:
  559. result["resources"][service] = []
  560. result["resources"][service].extend(resources)
  561. except Exception as e:
  562. # Capture detailed error information
  563. error_info = self._create_error_info(
  564. service=service,
  565. region=region,
  566. exception=e,
  567. )
  568. result["errors"].append(error_info)
  569. self.progress.log_error(service, region, str(e))
  570. self.progress.increment()
  571. def _create_error_info(
  572. self,
  573. service: str,
  574. region: str,
  575. exception: Exception,
  576. ) -> Dict[str, Any]:
  577. """
  578. Create a detailed error information dictionary.
  579. This method extracts detailed information from exceptions to provide
  580. useful error context for debugging and reporting.
  581. Args:
  582. service: Service that encountered the error
  583. region: Region where the error occurred
  584. exception: The exception that was raised
  585. Returns:
  586. Dictionary containing error details
  587. Requirements:
  588. - 1.8: Record errors and continue scanning other resources
  589. - 6.1: Display missing permission information when encountering permission errors
  590. """
  591. error_info: Dict[str, Any] = {
  592. "service": service,
  593. "region": region,
  594. "error": str(exception),
  595. "error_type": type(exception).__name__,
  596. "details": None,
  597. }
  598. # Extract additional details from ClientError
  599. if isinstance(exception, ClientError):
  600. error_response = exception.response.get("Error", {})
  601. error_code = error_response.get("Code", "")
  602. error_message = error_response.get("Message", "")
  603. error_info["details"] = {
  604. "error_code": error_code,
  605. "error_message": error_message,
  606. }
  607. # Check for permission errors and provide helpful information
  608. if error_code in ("AccessDenied", "AccessDeniedException", "UnauthorizedAccess"):
  609. error_info["details"]["permission_hint"] = (
  610. f"Missing IAM permission for {service} in {region}. "
  611. f"Please ensure your IAM role has the necessary permissions."
  612. )
  613. logger.warning(
  614. f"Permission denied for {service} in {region}: {error_message}"
  615. )
  616. # Extract details from BotoCoreError
  617. elif isinstance(exception, BotoCoreError):
  618. error_info["details"] = {
  619. "botocore_error": str(exception),
  620. }
  621. return error_info
  622. def _scan_service(
  623. self,
  624. account_id: str,
  625. region: str,
  626. service: str,
  627. ) -> List[Dict[str, Any]]:
  628. """
  629. Scan a single service in a specific region.
  630. Args:
  631. account_id: AWS account ID
  632. region: Region to scan (or 'global' for global services)
  633. service: Service to scan
  634. Returns:
  635. List of resource dictionaries
  636. Note:
  637. This is a placeholder method. Actual service scanning methods
  638. will be implemented in subsequent tasks (1.2-1.5).
  639. """
  640. # Get the scanner method for this service
  641. scanner_method = self._get_scanner_method(service)
  642. if scanner_method is None:
  643. logger.warning(f"No scanner method found for service: {service}")
  644. return []
  645. # Use us-east-1 for global services
  646. actual_region = "us-east-1" if region == "global" else region
  647. return scanner_method(account_id, actual_region)
  648. def _get_scanner_method(self, service: str) -> Optional[Callable]:
  649. """
  650. Get the scanner method for a specific service.
  651. Args:
  652. service: Service name
  653. Returns:
  654. Scanner method callable or None if not found
  655. """
  656. scanner_methods: Dict[str, Callable] = {
  657. # VPC related services (Task 1.2)
  658. "vpc": self._scan_vpcs,
  659. "subnet": self._scan_subnets,
  660. "route_table": self._scan_route_tables,
  661. "internet_gateway": self._scan_internet_gateways,
  662. "nat_gateway": self._scan_nat_gateways,
  663. "security_group": self._scan_security_groups,
  664. "vpc_endpoint": self._scan_vpc_endpoints,
  665. "vpc_peering": self._scan_vpc_peering,
  666. "customer_gateway": self._scan_customer_gateways,
  667. "virtual_private_gateway": self._scan_virtual_private_gateways,
  668. "vpn_connection": self._scan_vpn_connections,
  669. # EC2 and compute services (Task 1.3)
  670. "ec2": self._scan_ec2_instances,
  671. "elastic_ip": self._scan_elastic_ips,
  672. "autoscaling": self._scan_autoscaling_groups,
  673. "elb": self._scan_load_balancers,
  674. "target_group": self._scan_target_groups,
  675. "lambda": self._scan_lambda_functions,
  676. "eks": self._scan_eks_clusters,
  677. # Database and storage services (Task 1.4)
  678. "rds": self._scan_rds_instances,
  679. "elasticache": self._scan_elasticache_clusters,
  680. "s3": self._scan_s3_buckets,
  681. "s3_event_notification": self._scan_s3_event_notifications,
  682. # Global and monitoring services (Task 1.5)
  683. "cloudfront": self._scan_cloudfront_distributions,
  684. "route53": self._scan_route53_hosted_zones,
  685. "acm": self._scan_acm_certificates,
  686. "waf": self._scan_waf_web_acls,
  687. "sns": self._scan_sns_topics,
  688. "cloudwatch": self._scan_cloudwatch_log_groups,
  689. "eventbridge": self._scan_eventbridge_rules,
  690. "cloudtrail": self._scan_cloudtrail_trails,
  691. "config": self._scan_config_recorders,
  692. }
  693. return scanner_methods.get(service)
  694. def export_json(self, result: Dict[str, Any], output_path: str) -> None:
  695. """
  696. Export scan results to a JSON file.
  697. This method serializes the scan result to a JSON file with proper handling
  698. of non-serializable types (datetime, bytes, sets, etc.).
  699. Args:
  700. result: Scan result dictionary containing metadata, resources, and errors
  701. output_path: Path to output JSON file
  702. Requirements:
  703. - 1.6: Export results as JSON file when scan completes
  704. - 2.1: Include metadata fields (account_id, scan_timestamp, regions_scanned, services_scanned)
  705. - 2.2: Include resources field organized by service type
  706. - 2.3: Include errors field with scan error information
  707. - 2.4: Use JSON format encoding for serialization
  708. Raises:
  709. IOError: If unable to write to the output file
  710. TypeError: If result contains non-serializable types that cannot be converted
  711. """
  712. try:
  713. # Validate the result structure before export
  714. self._validate_scan_data_structure(result)
  715. # Serialize with custom encoder for non-standard types
  716. json_str = json.dumps(
  717. result,
  718. indent=2,
  719. ensure_ascii=False,
  720. default=self._json_serializer,
  721. sort_keys=False,
  722. )
  723. # Write to file
  724. with open(output_path, "w", encoding="utf-8") as f:
  725. f.write(json_str)
  726. logger.info(f"Scan results exported to: {output_path}")
  727. logger.info(
  728. f"Export summary: {result['metadata']['total_resources']} resources, "
  729. f"{result['metadata']['total_errors']} errors"
  730. )
  731. except (IOError, OSError) as e:
  732. logger.error(f"Failed to write to {output_path}: {e}")
  733. raise
  734. except (TypeError, ValueError) as e:
  735. logger.error(f"Failed to serialize scan results: {e}")
  736. raise
  737. def _json_serializer(self, obj: Any) -> Any:
  738. """
  739. Custom JSON serializer for non-standard types.
  740. Handles datetime, date, bytes, sets, and other non-JSON-serializable types.
  741. Args:
  742. obj: Object to serialize
  743. Returns:
  744. JSON-serializable representation of the object
  745. Requirements:
  746. - 2.4: Use JSON format encoding (handle non-serializable types gracefully)
  747. """
  748. # Handle datetime objects - convert to ISO 8601 format
  749. if isinstance(obj, datetime):
  750. # Ensure UTC timezone and proper ISO 8601 format
  751. if obj.tzinfo is None:
  752. obj = obj.replace(tzinfo=timezone.utc)
  753. return obj.isoformat().replace("+00:00", "Z")
  754. # Handle date objects
  755. if hasattr(obj, 'isoformat'):
  756. return obj.isoformat()
  757. # Handle bytes
  758. if isinstance(obj, bytes):
  759. return obj.decode('utf-8', errors='replace')
  760. # Handle sets
  761. if isinstance(obj, set):
  762. return list(obj)
  763. # Handle frozensets
  764. if isinstance(obj, frozenset):
  765. return list(obj)
  766. # Handle objects with __dict__
  767. if hasattr(obj, '__dict__'):
  768. return obj.__dict__
  769. # Fallback to string representation
  770. return str(obj)
  771. def _validate_scan_data_structure(self, data: Dict[str, Any]) -> None:
  772. """
  773. Validate that the scan data structure matches the expected format.
  774. This method ensures the data structure conforms to the ScanData interface
  775. defined in the design document.
  776. Args:
  777. data: Scan data dictionary to validate
  778. Raises:
  779. ValueError: If required fields are missing or have incorrect types
  780. Requirements:
  781. - 2.1: Metadata fields (account_id, scan_timestamp, regions_scanned, services_scanned)
  782. - 2.2: Resources field organized by service type
  783. - 2.3: Errors field with error information
  784. """
  785. # Check top-level structure
  786. required_top_level = ["metadata", "resources", "errors"]
  787. for field in required_top_level:
  788. if field not in data:
  789. raise ValueError(f"Missing required top-level field: {field}")
  790. # Check metadata fields
  791. metadata = data.get("metadata", {})
  792. required_metadata = [
  793. "account_id",
  794. "scan_timestamp",
  795. "regions_scanned",
  796. "services_scanned",
  797. "scanner_version",
  798. "total_resources",
  799. "total_errors",
  800. ]
  801. missing_metadata = [f for f in required_metadata if f not in metadata]
  802. if missing_metadata:
  803. raise ValueError(f"Missing required metadata fields: {missing_metadata}")
  804. # Validate metadata field types
  805. if not isinstance(metadata.get("account_id"), str):
  806. raise ValueError("metadata.account_id must be a string")
  807. if not isinstance(metadata.get("scan_timestamp"), str):
  808. raise ValueError("metadata.scan_timestamp must be a string")
  809. if not isinstance(metadata.get("regions_scanned"), list):
  810. raise ValueError("metadata.regions_scanned must be a list")
  811. if not isinstance(metadata.get("services_scanned"), list):
  812. raise ValueError("metadata.services_scanned must be a list")
  813. if not isinstance(metadata.get("scanner_version"), str):
  814. raise ValueError("metadata.scanner_version must be a string")
  815. if not isinstance(metadata.get("total_resources"), int):
  816. raise ValueError("metadata.total_resources must be an integer")
  817. if not isinstance(metadata.get("total_errors"), int):
  818. raise ValueError("metadata.total_errors must be an integer")
  819. # Validate resources structure
  820. resources = data.get("resources", {})
  821. if not isinstance(resources, dict):
  822. raise ValueError("resources must be a dictionary")
  823. # Validate errors structure
  824. errors = data.get("errors", [])
  825. if not isinstance(errors, list):
  826. raise ValueError("errors must be a list")
  827. @staticmethod
  828. def create_scan_data(
  829. account_id: str,
  830. regions_scanned: List[str],
  831. services_scanned: List[str],
  832. resources: Dict[str, List[Dict[str, Any]]],
  833. errors: List[Dict[str, Any]],
  834. scan_timestamp: Optional[str] = None,
  835. ) -> Dict[str, Any]:
  836. """
  837. Create a properly structured ScanData dictionary.
  838. This is a factory method to create scan data with the correct structure
  839. as defined in the design document.
  840. Args:
  841. account_id: AWS account ID
  842. regions_scanned: List of regions that were scanned
  843. services_scanned: List of services that were scanned
  844. resources: Dictionary of resources organized by service type
  845. errors: List of error dictionaries
  846. scan_timestamp: Optional ISO 8601 timestamp (defaults to current time)
  847. Returns:
  848. Properly structured ScanData dictionary
  849. Requirements:
  850. - 2.1: Include metadata fields
  851. - 2.2: Include resources field organized by service type
  852. - 2.3: Include errors field
  853. """
  854. if scan_timestamp is None:
  855. scan_timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
  856. # Calculate totals
  857. total_resources = sum(len(res_list) for res_list in resources.values())
  858. total_errors = len(errors)
  859. return {
  860. "metadata": {
  861. "account_id": account_id,
  862. "scan_timestamp": scan_timestamp,
  863. "regions_scanned": regions_scanned,
  864. "services_scanned": services_scanned,
  865. "scanner_version": __version__,
  866. "total_resources": total_resources,
  867. "total_errors": total_errors,
  868. },
  869. "resources": resources,
  870. "errors": errors,
  871. }
  872. @staticmethod
  873. def load_scan_data(file_path: str) -> Dict[str, Any]:
  874. """
  875. Load scan data from a JSON file.
  876. This method reads and parses a JSON file containing scan data,
  877. validating its structure.
  878. Args:
  879. file_path: Path to the JSON file to load
  880. Returns:
  881. Parsed scan data dictionary
  882. Raises:
  883. FileNotFoundError: If the file does not exist
  884. json.JSONDecodeError: If the file contains invalid JSON
  885. ValueError: If the JSON structure is invalid
  886. Requirements:
  887. - 2.5: Round-trip consistency (load what was exported)
  888. """
  889. try:
  890. with open(file_path, "r", encoding="utf-8") as f:
  891. data = json.load(f)
  892. # Create a temporary scanner instance to validate
  893. # We use a class method approach to avoid needing AWS credentials
  894. CloudShellScanner._validate_scan_data_structure_static(data)
  895. logger.info(f"Loaded scan data from: {file_path}")
  896. return data
  897. except FileNotFoundError:
  898. logger.error(f"File not found: {file_path}")
  899. raise
  900. except json.JSONDecodeError as e:
  901. logger.error(f"Invalid JSON in {file_path}: {e}")
  902. raise
  903. @staticmethod
  904. def _validate_scan_data_structure_static(data: Dict[str, Any]) -> None:
  905. """
  906. Static version of _validate_scan_data_structure for use without instance.
  907. Args:
  908. data: Scan data dictionary to validate
  909. Raises:
  910. ValueError: If required fields are missing or have incorrect types
  911. """
  912. # Check top-level structure
  913. required_top_level = ["metadata", "resources", "errors"]
  914. for field in required_top_level:
  915. if field not in data:
  916. raise ValueError(f"Missing required top-level field: {field}")
  917. # Check metadata fields
  918. metadata = data.get("metadata", {})
  919. required_metadata = [
  920. "account_id",
  921. "scan_timestamp",
  922. "regions_scanned",
  923. "services_scanned",
  924. "scanner_version",
  925. "total_resources",
  926. "total_errors",
  927. ]
  928. missing_metadata = [f for f in required_metadata if f not in metadata]
  929. if missing_metadata:
  930. raise ValueError(f"Missing required metadata fields: {missing_metadata}")
  931. # Helper method to get resource name from tags
  932. def _get_name_from_tags(
  933. self, tags: Optional[List[Dict[str, str]]], default: str = ""
  934. ) -> str:
  935. """
  936. Extract Name tag value from tags list.
  937. Args:
  938. tags: List of tag dictionaries with 'Key' and 'Value'
  939. default: Default value if Name tag not found
  940. Returns:
  941. Name tag value or default
  942. """
  943. if not tags:
  944. return default
  945. for tag in tags:
  946. if tag.get("Key") == "Name":
  947. return tag.get("Value", default)
  948. return default
  949. # =========================================================================
  950. # VPC Related Service Scanners (Task 1.2)
  951. # =========================================================================
  952. def _scan_vpcs(self, account_id: str, region: str) -> List[Dict[str, Any]]:
  953. """
  954. Scan VPCs in the specified region.
  955. Args:
  956. account_id: AWS account ID
  957. region: Region to scan
  958. Returns:
  959. List of VPC resource dictionaries
  960. Attributes: Region, Name, ID, CIDR
  961. """
  962. resources = []
  963. ec2_client = self._session.client("ec2", region_name=region)
  964. paginator = ec2_client.get_paginator("describe_vpcs")
  965. for page in paginator.paginate():
  966. for vpc in page.get("Vpcs", []):
  967. name = self._get_name_from_tags(vpc.get("Tags", []), vpc["VpcId"])
  968. resources.append({
  969. "account_id": account_id,
  970. "region": region,
  971. "service": "vpc",
  972. "resource_type": "VPC",
  973. "resource_id": vpc["VpcId"],
  974. "name": name,
  975. "attributes": {
  976. "Region": region,
  977. "Name": name,
  978. "ID": vpc["VpcId"],
  979. "CIDR": vpc.get("CidrBlock", ""),
  980. },
  981. })
  982. return resources
  983. def _scan_subnets(self, account_id: str, region: str) -> List[Dict[str, Any]]:
  984. """
  985. Scan Subnets in the specified region.
  986. Args:
  987. account_id: AWS account ID
  988. region: Region to scan
  989. Returns:
  990. List of Subnet resource dictionaries
  991. Attributes: Name, ID, AZ, CIDR
  992. """
  993. resources = []
  994. ec2_client = self._session.client("ec2", region_name=region)
  995. paginator = ec2_client.get_paginator("describe_subnets")
  996. for page in paginator.paginate():
  997. for subnet in page.get("Subnets", []):
  998. name = self._get_name_from_tags(
  999. subnet.get("Tags", []), subnet["SubnetId"]
  1000. )
  1001. resources.append({
  1002. "account_id": account_id,
  1003. "region": region,
  1004. "service": "subnet",
  1005. "resource_type": "Subnet",
  1006. "resource_id": subnet["SubnetId"],
  1007. "name": name,
  1008. "attributes": {
  1009. "Name": name,
  1010. "ID": subnet["SubnetId"],
  1011. "AZ": subnet.get("AvailabilityZone", ""),
  1012. "CIDR": subnet.get("CidrBlock", ""),
  1013. },
  1014. })
  1015. return resources
  1016. def _scan_route_tables(self, account_id: str, region: str) -> List[Dict[str, Any]]:
  1017. """
  1018. Scan Route Tables in the specified region.
  1019. Args:
  1020. account_id: AWS account ID
  1021. region: Region to scan
  1022. Returns:
  1023. List of Route Table resource dictionaries
  1024. Attributes: Name, ID, Subnet Associations
  1025. """
  1026. resources = []
  1027. ec2_client = self._session.client("ec2", region_name=region)
  1028. paginator = ec2_client.get_paginator("describe_route_tables")
  1029. for page in paginator.paginate():
  1030. for rt in page.get("RouteTables", []):
  1031. name = self._get_name_from_tags(
  1032. rt.get("Tags", []), rt["RouteTableId"]
  1033. )
  1034. # Get subnet associations
  1035. associations = []
  1036. for assoc in rt.get("Associations", []):
  1037. if assoc.get("SubnetId"):
  1038. associations.append(assoc["SubnetId"])
  1039. resources.append({
  1040. "account_id": account_id,
  1041. "region": region,
  1042. "service": "route_table",
  1043. "resource_type": "Route Table",
  1044. "resource_id": rt["RouteTableId"],
  1045. "name": name,
  1046. "attributes": {
  1047. "Name": name,
  1048. "ID": rt["RouteTableId"],
  1049. "Subnet Associations": ", ".join(associations) if associations else "None",
  1050. },
  1051. })
  1052. return resources
  1053. def _scan_internet_gateways(
  1054. self, account_id: str, region: str
  1055. ) -> List[Dict[str, Any]]:
  1056. """
  1057. Scan Internet Gateways in the specified region.
  1058. Args:
  1059. account_id: AWS account ID
  1060. region: Region to scan
  1061. Returns:
  1062. List of Internet Gateway resource dictionaries
  1063. Attributes: Name, ID
  1064. """
  1065. resources = []
  1066. ec2_client = self._session.client("ec2", region_name=region)
  1067. paginator = ec2_client.get_paginator("describe_internet_gateways")
  1068. for page in paginator.paginate():
  1069. for igw in page.get("InternetGateways", []):
  1070. igw_id = igw["InternetGatewayId"]
  1071. name = self._get_name_from_tags(igw.get("Tags", []), igw_id)
  1072. resources.append({
  1073. "account_id": account_id,
  1074. "region": region,
  1075. "service": "internet_gateway",
  1076. "resource_type": "Internet Gateway",
  1077. "resource_id": igw_id,
  1078. "name": name,
  1079. "attributes": {
  1080. "Name": name,
  1081. "ID": igw_id,
  1082. },
  1083. })
  1084. return resources
  1085. def _scan_nat_gateways(self, account_id: str, region: str) -> List[Dict[str, Any]]:
  1086. """
  1087. Scan NAT Gateways in the specified region.
  1088. Args:
  1089. account_id: AWS account ID
  1090. region: Region to scan
  1091. Returns:
  1092. List of NAT Gateway resource dictionaries
  1093. Attributes: Name, ID, Public IP, Private IP
  1094. """
  1095. resources = []
  1096. ec2_client = self._session.client("ec2", region_name=region)
  1097. paginator = ec2_client.get_paginator("describe_nat_gateways")
  1098. for page in paginator.paginate():
  1099. for nat in page.get("NatGateways", []):
  1100. # Skip deleted NAT gateways
  1101. if nat.get("State") == "deleted":
  1102. continue
  1103. name = self._get_name_from_tags(
  1104. nat.get("Tags", []), nat["NatGatewayId"]
  1105. )
  1106. # Get IP addresses from addresses
  1107. public_ip = ""
  1108. private_ip = ""
  1109. for addr in nat.get("NatGatewayAddresses", []):
  1110. if addr.get("PublicIp"):
  1111. public_ip = addr["PublicIp"]
  1112. if addr.get("PrivateIp"):
  1113. private_ip = addr["PrivateIp"]
  1114. resources.append({
  1115. "account_id": account_id,
  1116. "region": region,
  1117. "service": "nat_gateway",
  1118. "resource_type": "NAT Gateway",
  1119. "resource_id": nat["NatGatewayId"],
  1120. "name": name,
  1121. "attributes": {
  1122. "Name": name,
  1123. "ID": nat["NatGatewayId"],
  1124. "Public IP": public_ip,
  1125. "Private IP": private_ip,
  1126. },
  1127. })
  1128. return resources
  1129. def _scan_security_groups(
  1130. self, account_id: str, region: str
  1131. ) -> List[Dict[str, Any]]:
  1132. """
  1133. Scan Security Groups in the specified region.
  1134. Args:
  1135. account_id: AWS account ID
  1136. region: Region to scan
  1137. Returns:
  1138. List of Security Group resource dictionaries
  1139. Attributes: Name, ID, Protocol, Port range, Source
  1140. Note: Creates one entry per inbound rule
  1141. """
  1142. resources = []
  1143. ec2_client = self._session.client("ec2", region_name=region)
  1144. paginator = ec2_client.get_paginator("describe_security_groups")
  1145. for page in paginator.paginate():
  1146. for sg in page.get("SecurityGroups", []):
  1147. sg_name = sg.get("GroupName", sg["GroupId"])
  1148. # Process inbound rules
  1149. for rule in sg.get("IpPermissions", []):
  1150. protocol = rule.get("IpProtocol", "-1")
  1151. if protocol == "-1":
  1152. protocol = "All"
  1153. # Get port range
  1154. from_port = rule.get("FromPort", "All")
  1155. to_port = rule.get("ToPort", "All")
  1156. if from_port == to_port:
  1157. port_range = str(from_port) if from_port != "All" else "All"
  1158. else:
  1159. port_range = f"{from_port}-{to_port}"
  1160. # Get sources
  1161. sources = []
  1162. for ip_range in rule.get("IpRanges", []):
  1163. sources.append(ip_range.get("CidrIp", ""))
  1164. for ip_range in rule.get("Ipv6Ranges", []):
  1165. sources.append(ip_range.get("CidrIpv6", ""))
  1166. for group in rule.get("UserIdGroupPairs", []):
  1167. sources.append(group.get("GroupId", ""))
  1168. source = ", ".join(sources) if sources else "N/A"
  1169. resources.append({
  1170. "account_id": account_id,
  1171. "region": region,
  1172. "service": "security_group",
  1173. "resource_type": "Security Group",
  1174. "resource_id": sg["GroupId"],
  1175. "name": sg_name,
  1176. "attributes": {
  1177. "Name": sg_name,
  1178. "ID": sg["GroupId"],
  1179. "Protocol": protocol,
  1180. "Port range": port_range,
  1181. "Source": source,
  1182. },
  1183. })
  1184. # If no inbound rules, still add the security group
  1185. if not sg.get("IpPermissions"):
  1186. resources.append({
  1187. "account_id": account_id,
  1188. "region": region,
  1189. "service": "security_group",
  1190. "resource_type": "Security Group",
  1191. "resource_id": sg["GroupId"],
  1192. "name": sg_name,
  1193. "attributes": {
  1194. "Name": sg_name,
  1195. "ID": sg["GroupId"],
  1196. "Protocol": "N/A",
  1197. "Port range": "N/A",
  1198. "Source": "N/A",
  1199. },
  1200. })
  1201. return resources
  1202. def _scan_vpc_endpoints(
  1203. self, account_id: str, region: str
  1204. ) -> List[Dict[str, Any]]:
  1205. """
  1206. Scan VPC Endpoints in the specified region.
  1207. Args:
  1208. account_id: AWS account ID
  1209. region: Region to scan
  1210. Returns:
  1211. List of VPC Endpoint resource dictionaries
  1212. Attributes: Name, ID, VPC, Service Name, Type
  1213. """
  1214. resources = []
  1215. ec2_client = self._session.client("ec2", region_name=region)
  1216. paginator = ec2_client.get_paginator("describe_vpc_endpoints")
  1217. for page in paginator.paginate():
  1218. for endpoint in page.get("VpcEndpoints", []):
  1219. name = self._get_name_from_tags(
  1220. endpoint.get("Tags", []), endpoint["VpcEndpointId"]
  1221. )
  1222. resources.append({
  1223. "account_id": account_id,
  1224. "region": region,
  1225. "service": "vpc_endpoint",
  1226. "resource_type": "Endpoint",
  1227. "resource_id": endpoint["VpcEndpointId"],
  1228. "name": name,
  1229. "attributes": {
  1230. "Name": name,
  1231. "ID": endpoint["VpcEndpointId"],
  1232. "VPC": endpoint.get("VpcId", ""),
  1233. "Service Name": endpoint.get("ServiceName", ""),
  1234. "Type": endpoint.get("VpcEndpointType", ""),
  1235. },
  1236. })
  1237. return resources
  1238. def _scan_vpc_peering(self, account_id: str, region: str) -> List[Dict[str, Any]]:
  1239. """
  1240. Scan VPC Peering Connections in the specified region.
  1241. Args:
  1242. account_id: AWS account ID
  1243. region: Region to scan
  1244. Returns:
  1245. List of VPC Peering resource dictionaries
  1246. Attributes: Name, Peering Connection ID, Requester VPC, Accepter VPC
  1247. """
  1248. resources = []
  1249. ec2_client = self._session.client("ec2", region_name=region)
  1250. paginator = ec2_client.get_paginator("describe_vpc_peering_connections")
  1251. for page in paginator.paginate():
  1252. for peering in page.get("VpcPeeringConnections", []):
  1253. # Skip deleted/rejected peerings
  1254. status = peering.get("Status", {}).get("Code", "")
  1255. if status in ["deleted", "rejected", "failed"]:
  1256. continue
  1257. name = self._get_name_from_tags(
  1258. peering.get("Tags", []), peering["VpcPeeringConnectionId"]
  1259. )
  1260. requester_vpc = peering.get("RequesterVpcInfo", {}).get("VpcId", "")
  1261. accepter_vpc = peering.get("AccepterVpcInfo", {}).get("VpcId", "")
  1262. resources.append({
  1263. "account_id": account_id,
  1264. "region": region,
  1265. "service": "vpc_peering",
  1266. "resource_type": "VPC Peering",
  1267. "resource_id": peering["VpcPeeringConnectionId"],
  1268. "name": name,
  1269. "attributes": {
  1270. "Name": name,
  1271. "Peering Connection ID": peering["VpcPeeringConnectionId"],
  1272. "Requester VPC": requester_vpc,
  1273. "Accepter VPC": accepter_vpc,
  1274. },
  1275. })
  1276. return resources
  1277. def _scan_customer_gateways(
  1278. self, account_id: str, region: str
  1279. ) -> List[Dict[str, Any]]:
  1280. """
  1281. Scan Customer Gateways in the specified region.
  1282. Args:
  1283. account_id: AWS account ID
  1284. region: Region to scan
  1285. Returns:
  1286. List of Customer Gateway resource dictionaries
  1287. Attributes: Name, Customer Gateway ID, IP Address
  1288. """
  1289. resources = []
  1290. ec2_client = self._session.client("ec2", region_name=region)
  1291. response = ec2_client.describe_customer_gateways()
  1292. for cgw in response.get("CustomerGateways", []):
  1293. # Skip deleted gateways
  1294. if cgw.get("State") == "deleted":
  1295. continue
  1296. name = self._get_name_from_tags(
  1297. cgw.get("Tags", []), cgw["CustomerGatewayId"]
  1298. )
  1299. resources.append({
  1300. "account_id": account_id,
  1301. "region": region,
  1302. "service": "customer_gateway",
  1303. "resource_type": "Customer Gateway",
  1304. "resource_id": cgw["CustomerGatewayId"],
  1305. "name": name,
  1306. "attributes": {
  1307. "Name": name,
  1308. "Customer Gateway ID": cgw["CustomerGatewayId"],
  1309. "IP Address": cgw.get("IpAddress", ""),
  1310. },
  1311. })
  1312. return resources
  1313. def _scan_virtual_private_gateways(
  1314. self, account_id: str, region: str
  1315. ) -> List[Dict[str, Any]]:
  1316. """
  1317. Scan Virtual Private Gateways in the specified region.
  1318. Args:
  1319. account_id: AWS account ID
  1320. region: Region to scan
  1321. Returns:
  1322. List of Virtual Private Gateway resource dictionaries
  1323. Attributes: Name, Virtual Private Gateway ID, VPC
  1324. """
  1325. resources = []
  1326. ec2_client = self._session.client("ec2", region_name=region)
  1327. response = ec2_client.describe_vpn_gateways()
  1328. for vgw in response.get("VpnGateways", []):
  1329. # Skip deleted gateways
  1330. if vgw.get("State") == "deleted":
  1331. continue
  1332. name = self._get_name_from_tags(
  1333. vgw.get("Tags", []), vgw["VpnGatewayId"]
  1334. )
  1335. # Get attached VPC
  1336. vpc_id = ""
  1337. for attachment in vgw.get("VpcAttachments", []):
  1338. if attachment.get("State") == "attached":
  1339. vpc_id = attachment.get("VpcId", "")
  1340. break
  1341. resources.append({
  1342. "account_id": account_id,
  1343. "region": region,
  1344. "service": "virtual_private_gateway",
  1345. "resource_type": "Virtual Private Gateway",
  1346. "resource_id": vgw["VpnGatewayId"],
  1347. "name": name,
  1348. "attributes": {
  1349. "Name": name,
  1350. "Virtual Private Gateway ID": vgw["VpnGatewayId"],
  1351. "VPC": vpc_id,
  1352. },
  1353. })
  1354. return resources
  1355. def _scan_vpn_connections(
  1356. self, account_id: str, region: str
  1357. ) -> List[Dict[str, Any]]:
  1358. """
  1359. Scan VPN Connections in the specified region.
  1360. Args:
  1361. account_id: AWS account ID
  1362. region: Region to scan
  1363. Returns:
  1364. List of VPN Connection resource dictionaries
  1365. Attributes: Name, VPN ID, Routes
  1366. """
  1367. resources = []
  1368. ec2_client = self._session.client("ec2", region_name=region)
  1369. response = ec2_client.describe_vpn_connections()
  1370. for vpn in response.get("VpnConnections", []):
  1371. # Skip deleted connections
  1372. if vpn.get("State") == "deleted":
  1373. continue
  1374. name = self._get_name_from_tags(
  1375. vpn.get("Tags", []), vpn["VpnConnectionId"]
  1376. )
  1377. # Get routes
  1378. routes = []
  1379. for route in vpn.get("Routes", []):
  1380. if route.get("DestinationCidrBlock"):
  1381. routes.append(route["DestinationCidrBlock"])
  1382. resources.append({
  1383. "account_id": account_id,
  1384. "region": region,
  1385. "service": "vpn_connection",
  1386. "resource_type": "VPN Connection",
  1387. "resource_id": vpn["VpnConnectionId"],
  1388. "name": name,
  1389. "attributes": {
  1390. "Name": name,
  1391. "VPN ID": vpn["VpnConnectionId"],
  1392. "Routes": ", ".join(routes) if routes else "N/A",
  1393. },
  1394. })
  1395. return resources
  1396. # =========================================================================
  1397. # EC2 and Compute Service Scanners (Task 1.3)
  1398. # =========================================================================
  1399. def _scan_ec2_instances(
  1400. self, account_id: str, region: str
  1401. ) -> List[Dict[str, Any]]:
  1402. """
  1403. Scan EC2 Instances in the specified region.
  1404. Args:
  1405. account_id: AWS account ID
  1406. region: Region to scan
  1407. Returns:
  1408. List of EC2 Instance resource dictionaries
  1409. Attributes: Name, Instance ID, Instance Type, AZ, AMI,
  1410. Public IP, Public DNS, Private IP, VPC ID, Subnet ID,
  1411. Key, Security Groups, EBS Type, EBS Size, Encryption
  1412. """
  1413. resources = []
  1414. ec2_client = self._session.client("ec2", region_name=region)
  1415. paginator = ec2_client.get_paginator("describe_instances")
  1416. for page in paginator.paginate():
  1417. for reservation in page.get("Reservations", []):
  1418. for instance in reservation.get("Instances", []):
  1419. # Skip terminated instances
  1420. state = instance.get("State", {}).get("Name", "")
  1421. if state == "terminated":
  1422. continue
  1423. name = self._get_name_from_tags(
  1424. instance.get("Tags", []), instance["InstanceId"]
  1425. )
  1426. # Get security groups
  1427. security_groups = []
  1428. for sg in instance.get("SecurityGroups", []):
  1429. security_groups.append(
  1430. sg.get("GroupName", sg.get("GroupId", ""))
  1431. )
  1432. # Get EBS volume info
  1433. ebs_type = ""
  1434. ebs_size = ""
  1435. ebs_encrypted = ""
  1436. for block_device in instance.get("BlockDeviceMappings", []):
  1437. ebs = block_device.get("Ebs", {})
  1438. if ebs.get("VolumeId"):
  1439. # Get volume details
  1440. try:
  1441. vol_response = ec2_client.describe_volumes(
  1442. VolumeIds=[ebs["VolumeId"]]
  1443. )
  1444. if vol_response.get("Volumes"):
  1445. volume = vol_response["Volumes"][0]
  1446. ebs_type = volume.get("VolumeType", "")
  1447. ebs_size = f"{volume.get('Size', '')} GB"
  1448. ebs_encrypted = (
  1449. "Yes" if volume.get("Encrypted") else "No"
  1450. )
  1451. except Exception as e:
  1452. logger.warning(
  1453. f"Failed to get volume details: {str(e)}"
  1454. )
  1455. break # Only get first volume for simplicity
  1456. resources.append({
  1457. "account_id": account_id,
  1458. "region": region,
  1459. "service": "ec2",
  1460. "resource_type": "Instance",
  1461. "resource_id": instance["InstanceId"],
  1462. "name": name,
  1463. "attributes": {
  1464. "Name": name,
  1465. "Instance ID": instance["InstanceId"],
  1466. "Instance Type": instance.get("InstanceType", ""),
  1467. "AZ": instance.get("Placement", {}).get(
  1468. "AvailabilityZone", ""
  1469. ),
  1470. "AMI": instance.get("ImageId", ""),
  1471. "Public IP": instance.get("PublicIpAddress", ""),
  1472. "Public DNS": instance.get("PublicDnsName", ""),
  1473. "Private IP": instance.get("PrivateIpAddress", ""),
  1474. "VPC ID": instance.get("VpcId", ""),
  1475. "Subnet ID": instance.get("SubnetId", ""),
  1476. "Key": instance.get("KeyName", ""),
  1477. "Security Groups": ", ".join(security_groups),
  1478. "EBS Type": ebs_type,
  1479. "EBS Size": ebs_size,
  1480. "Encryption": ebs_encrypted,
  1481. "Other Requirement": "",
  1482. },
  1483. })
  1484. return resources
  1485. def _scan_elastic_ips(
  1486. self, account_id: str, region: str
  1487. ) -> List[Dict[str, Any]]:
  1488. """
  1489. Scan Elastic IPs in the specified region.
  1490. Args:
  1491. account_id: AWS account ID
  1492. region: Region to scan
  1493. Returns:
  1494. List of Elastic IP resource dictionaries
  1495. Attributes: Name, Elastic IP
  1496. """
  1497. resources = []
  1498. ec2_client = self._session.client("ec2", region_name=region)
  1499. response = ec2_client.describe_addresses()
  1500. for eip in response.get("Addresses", []):
  1501. public_ip = eip.get("PublicIp", "")
  1502. name = self._get_name_from_tags(
  1503. eip.get("Tags", []),
  1504. public_ip or eip.get("AllocationId", ""),
  1505. )
  1506. resources.append({
  1507. "account_id": account_id,
  1508. "region": region,
  1509. "service": "elastic_ip",
  1510. "resource_type": "Elastic IP",
  1511. "resource_id": eip.get("AllocationId", public_ip),
  1512. "name": name,
  1513. "attributes": {
  1514. "Name": name,
  1515. "Elastic IP": public_ip,
  1516. },
  1517. })
  1518. return resources
  1519. def _scan_autoscaling_groups(
  1520. self, account_id: str, region: str
  1521. ) -> List[Dict[str, Any]]:
  1522. """
  1523. Scan Auto Scaling Groups in the specified region.
  1524. Args:
  1525. account_id: AWS account ID
  1526. region: Region to scan
  1527. Returns:
  1528. List of Auto Scaling Group resource dictionaries
  1529. Attributes: Name, Launch Template, AMI, Instance type, Key, Target Groups,
  1530. Desired, Min, Max, Scaling Policy
  1531. """
  1532. resources = []
  1533. asg_client = self._session.client("autoscaling", region_name=region)
  1534. ec2_client = self._session.client("ec2", region_name=region)
  1535. paginator = asg_client.get_paginator("describe_auto_scaling_groups")
  1536. for page in paginator.paginate():
  1537. for asg in page.get("AutoScalingGroups", []):
  1538. name = asg.get("AutoScalingGroupName", "")
  1539. # Get Launch Template info
  1540. launch_template_name = ""
  1541. ami = ""
  1542. instance_type = ""
  1543. key_name = ""
  1544. # Check for Launch Template
  1545. lt = asg.get("LaunchTemplate")
  1546. if lt:
  1547. launch_template_name = lt.get(
  1548. "LaunchTemplateName", lt.get("LaunchTemplateId", "")
  1549. )
  1550. # Get Launch Template details
  1551. try:
  1552. lt_response = ec2_client.describe_launch_template_versions(
  1553. LaunchTemplateId=lt.get("LaunchTemplateId", ""),
  1554. Versions=[lt.get("Version", "$Latest")],
  1555. )
  1556. if lt_response.get("LaunchTemplateVersions"):
  1557. lt_data = lt_response["LaunchTemplateVersions"][0].get(
  1558. "LaunchTemplateData", {}
  1559. )
  1560. ami = lt_data.get("ImageId", "")
  1561. instance_type = lt_data.get("InstanceType", "")
  1562. key_name = lt_data.get("KeyName", "")
  1563. except Exception as e:
  1564. logger.warning(
  1565. f"Failed to get launch template details: {str(e)}"
  1566. )
  1567. # Check for Mixed Instances Policy
  1568. mip = asg.get("MixedInstancesPolicy")
  1569. if mip:
  1570. lt_spec = mip.get("LaunchTemplate", {}).get(
  1571. "LaunchTemplateSpecification", {}
  1572. )
  1573. if lt_spec:
  1574. launch_template_name = lt_spec.get(
  1575. "LaunchTemplateName", lt_spec.get("LaunchTemplateId", "")
  1576. )
  1577. # Check for Launch Configuration (legacy)
  1578. lc_name = asg.get("LaunchConfigurationName")
  1579. if lc_name and not launch_template_name:
  1580. launch_template_name = f"LC: {lc_name}"
  1581. try:
  1582. lc_response = asg_client.describe_launch_configurations(
  1583. LaunchConfigurationNames=[lc_name]
  1584. )
  1585. if lc_response.get("LaunchConfigurations"):
  1586. lc = lc_response["LaunchConfigurations"][0]
  1587. ami = lc.get("ImageId", "")
  1588. instance_type = lc.get("InstanceType", "")
  1589. key_name = lc.get("KeyName", "")
  1590. except Exception as e:
  1591. logger.warning(
  1592. f"Failed to get launch configuration details: {str(e)}"
  1593. )
  1594. # Get Target Groups
  1595. target_groups = []
  1596. for tg_arn in asg.get("TargetGroupARNs", []):
  1597. # Extract target group name from ARN
  1598. tg_name = tg_arn.split("/")[-2] if "/" in tg_arn else tg_arn
  1599. target_groups.append(tg_name)
  1600. # Get Scaling Policies
  1601. scaling_policies = []
  1602. try:
  1603. policy_response = asg_client.describe_policies(
  1604. AutoScalingGroupName=name
  1605. )
  1606. for policy in policy_response.get("ScalingPolicies", []):
  1607. scaling_policies.append(policy.get("PolicyName", ""))
  1608. except Exception as e:
  1609. logger.warning(f"Failed to get scaling policies: {str(e)}")
  1610. resources.append({
  1611. "account_id": account_id,
  1612. "region": region,
  1613. "service": "autoscaling",
  1614. "resource_type": "Auto Scaling Group",
  1615. "resource_id": asg.get("AutoScalingGroupARN", name),
  1616. "name": name,
  1617. "attributes": {
  1618. "Name": name,
  1619. "Launch Template": launch_template_name,
  1620. "AMI": ami,
  1621. "Instance type": instance_type,
  1622. "Key": key_name,
  1623. "Target Groups": (
  1624. ", ".join(target_groups) if target_groups else "N/A"
  1625. ),
  1626. "Desired": str(asg.get("DesiredCapacity", 0)),
  1627. "Min": str(asg.get("MinSize", 0)),
  1628. "Max": str(asg.get("MaxSize", 0)),
  1629. "Scaling Policy": (
  1630. ", ".join(scaling_policies) if scaling_policies else "N/A"
  1631. ),
  1632. },
  1633. })
  1634. return resources
  1635. def _scan_load_balancers(
  1636. self, account_id: str, region: str
  1637. ) -> List[Dict[str, Any]]:
  1638. """
  1639. Scan Load Balancers (ALB, NLB, CLB) in the specified region.
  1640. Args:
  1641. account_id: AWS account ID
  1642. region: Region to scan
  1643. Returns:
  1644. List of Load Balancer resource dictionaries
  1645. Attributes: Name, Type, DNS, Scheme, VPC, Availability Zones, Subnet,
  1646. Security Groups
  1647. """
  1648. resources = []
  1649. # Scan ALB/NLB using elbv2
  1650. elbv2_client = self._session.client("elbv2", region_name=region)
  1651. try:
  1652. paginator = elbv2_client.get_paginator("describe_load_balancers")
  1653. for page in paginator.paginate():
  1654. for lb in page.get("LoadBalancers", []):
  1655. name = lb.get("LoadBalancerName", "")
  1656. lb_type = lb.get("Type", "application")
  1657. # Get availability zones and subnets
  1658. azs = []
  1659. subnets = []
  1660. for az_info in lb.get("AvailabilityZones", []):
  1661. azs.append(az_info.get("ZoneName", ""))
  1662. if az_info.get("SubnetId"):
  1663. subnets.append(az_info["SubnetId"])
  1664. # Get security groups (only for ALB)
  1665. security_groups = lb.get("SecurityGroups", [])
  1666. resources.append({
  1667. "account_id": account_id,
  1668. "region": region,
  1669. "service": "elb",
  1670. "resource_type": "Load Balancer",
  1671. "resource_id": lb.get("LoadBalancerArn", name),
  1672. "name": name,
  1673. "attributes": {
  1674. "Name": name,
  1675. "Type": lb_type.upper(),
  1676. "DNS": lb.get("DNSName", ""),
  1677. "Scheme": lb.get("Scheme", ""),
  1678. "VPC": lb.get("VpcId", ""),
  1679. "Availability Zones": ", ".join(azs),
  1680. "Subnet": ", ".join(subnets),
  1681. "Security Groups": (
  1682. ", ".join(security_groups)
  1683. if security_groups
  1684. else "N/A"
  1685. ),
  1686. },
  1687. })
  1688. except Exception as e:
  1689. logger.warning(f"Failed to scan ALB/NLB: {str(e)}")
  1690. # Scan Classic Load Balancers
  1691. elb_client = self._session.client("elb", region_name=region)
  1692. try:
  1693. paginator = elb_client.get_paginator("describe_load_balancers")
  1694. for page in paginator.paginate():
  1695. for lb in page.get("LoadBalancerDescriptions", []):
  1696. name = lb.get("LoadBalancerName", "")
  1697. resources.append({
  1698. "account_id": account_id,
  1699. "region": region,
  1700. "service": "elb",
  1701. "resource_type": "Load Balancer",
  1702. "resource_id": name,
  1703. "name": name,
  1704. "attributes": {
  1705. "Name": name,
  1706. "Type": "CLASSIC",
  1707. "DNS": lb.get("DNSName", ""),
  1708. "Scheme": lb.get("Scheme", ""),
  1709. "VPC": lb.get("VPCId", ""),
  1710. "Availability Zones": ", ".join(
  1711. lb.get("AvailabilityZones", [])
  1712. ),
  1713. "Subnet": ", ".join(lb.get("Subnets", [])),
  1714. "Security Groups": ", ".join(
  1715. lb.get("SecurityGroups", [])
  1716. ),
  1717. },
  1718. })
  1719. except Exception as e:
  1720. logger.warning(f"Failed to scan Classic ELB: {str(e)}")
  1721. return resources
  1722. def _scan_target_groups(
  1723. self, account_id: str, region: str
  1724. ) -> List[Dict[str, Any]]:
  1725. """
  1726. Scan Target Groups in the specified region.
  1727. Args:
  1728. account_id: AWS account ID
  1729. region: Region to scan
  1730. Returns:
  1731. List of Target Group resource dictionaries
  1732. Attributes: Load Balancer, TG Name, Port, Protocol, Registered Instances,
  1733. Health Check Path
  1734. """
  1735. resources = []
  1736. elbv2_client = self._session.client("elbv2", region_name=region)
  1737. try:
  1738. paginator = elbv2_client.get_paginator("describe_target_groups")
  1739. for page in paginator.paginate():
  1740. for tg in page.get("TargetGroups", []):
  1741. name = tg.get("TargetGroupName", "")
  1742. tg_arn = tg.get("TargetGroupArn", "")
  1743. # Get associated load balancers
  1744. lb_arns = tg.get("LoadBalancerArns", [])
  1745. lb_names = []
  1746. for lb_arn in lb_arns:
  1747. # Extract LB name from ARN
  1748. lb_name = lb_arn.split("/")[-2] if "/" in lb_arn else lb_arn
  1749. lb_names.append(lb_name)
  1750. # Get registered targets
  1751. registered_instances = []
  1752. try:
  1753. targets_response = elbv2_client.describe_target_health(
  1754. TargetGroupArn=tg_arn
  1755. )
  1756. for target in targets_response.get(
  1757. "TargetHealthDescriptions", []
  1758. ):
  1759. target_id = target.get("Target", {}).get("Id", "")
  1760. if target_id:
  1761. registered_instances.append(target_id)
  1762. except Exception as e:
  1763. logger.warning(f"Failed to get target health: {str(e)}")
  1764. resources.append({
  1765. "account_id": account_id,
  1766. "region": region,
  1767. "service": "target_group",
  1768. "resource_type": "Target Group",
  1769. "resource_id": tg_arn,
  1770. "name": name,
  1771. "attributes": {
  1772. "Load Balancer": (
  1773. ", ".join(lb_names) if lb_names else "N/A"
  1774. ),
  1775. "TG Name": name,
  1776. "Port": str(tg.get("Port", "")),
  1777. "Protocol": tg.get("Protocol", ""),
  1778. "Registered Instances": (
  1779. ", ".join(registered_instances)
  1780. if registered_instances
  1781. else "None"
  1782. ),
  1783. "Health Check Path": tg.get("HealthCheckPath", "N/A"),
  1784. },
  1785. })
  1786. except Exception as e:
  1787. logger.warning(f"Failed to scan target groups: {str(e)}")
  1788. return resources
  1789. def _scan_lambda_functions(
  1790. self, account_id: str, region: str
  1791. ) -> List[Dict[str, Any]]:
  1792. """
  1793. Scan Lambda Functions in the specified region.
  1794. Args:
  1795. account_id: AWS account ID
  1796. region: Region to scan
  1797. Returns:
  1798. List of Lambda Function resource dictionaries
  1799. Attributes: Function Name, Runtime, Memory (MB), Timeout (s), Last Modified
  1800. """
  1801. resources = []
  1802. lambda_client = self._session.client("lambda", region_name=region)
  1803. try:
  1804. paginator = lambda_client.get_paginator("list_functions")
  1805. for page in paginator.paginate():
  1806. for func in page.get("Functions", []):
  1807. func_name = func.get("FunctionName", "")
  1808. resources.append({
  1809. "account_id": account_id,
  1810. "region": region,
  1811. "service": "lambda",
  1812. "resource_type": "Function",
  1813. "resource_id": func.get("FunctionArn", func_name),
  1814. "name": func_name,
  1815. "attributes": {
  1816. "Function Name": func_name,
  1817. "Runtime": func.get("Runtime", "N/A"),
  1818. "Memory (MB)": str(func.get("MemorySize", "")),
  1819. "Timeout (s)": str(func.get("Timeout", "")),
  1820. "Last Modified": func.get("LastModified", ""),
  1821. },
  1822. })
  1823. except Exception as e:
  1824. logger.warning(f"Failed to scan Lambda functions: {str(e)}")
  1825. return resources
  1826. def _scan_eks_clusters(
  1827. self, account_id: str, region: str
  1828. ) -> List[Dict[str, Any]]:
  1829. """
  1830. Scan EKS Clusters in the specified region.
  1831. Args:
  1832. account_id: AWS account ID
  1833. region: Region to scan
  1834. Returns:
  1835. List of EKS Cluster resource dictionaries
  1836. Attributes: Cluster Name, Version, Status, Endpoint, VPC ID
  1837. """
  1838. resources = []
  1839. eks_client = self._session.client("eks", region_name=region)
  1840. try:
  1841. # List clusters
  1842. paginator = eks_client.get_paginator("list_clusters")
  1843. cluster_names = []
  1844. for page in paginator.paginate():
  1845. cluster_names.extend(page.get("clusters", []))
  1846. # Get details for each cluster
  1847. for cluster_name in cluster_names:
  1848. try:
  1849. response = eks_client.describe_cluster(name=cluster_name)
  1850. cluster = response.get("cluster", {})
  1851. resources.append({
  1852. "account_id": account_id,
  1853. "region": region,
  1854. "service": "eks",
  1855. "resource_type": "Cluster",
  1856. "resource_id": cluster.get("arn", cluster_name),
  1857. "name": cluster_name,
  1858. "attributes": {
  1859. "Cluster Name": cluster_name,
  1860. "Version": cluster.get("version", ""),
  1861. "Status": cluster.get("status", ""),
  1862. "Endpoint": cluster.get("endpoint", ""),
  1863. "VPC ID": cluster.get("resourcesVpcConfig", {}).get(
  1864. "vpcId", ""
  1865. ),
  1866. },
  1867. })
  1868. except Exception as e:
  1869. logger.warning(
  1870. f"Failed to describe EKS cluster {cluster_name}: {str(e)}"
  1871. )
  1872. except Exception as e:
  1873. logger.warning(f"Failed to list EKS clusters: {str(e)}")
  1874. return resources
  1875. # =========================================================================
  1876. # Database and Storage Service Scanners (Task 1.4)
  1877. # =========================================================================
  1878. def _scan_rds_instances(
  1879. self, account_id: str, region: str
  1880. ) -> List[Dict[str, Any]]:
  1881. """
  1882. Scan RDS DB Instances in the specified region.
  1883. Args:
  1884. account_id: AWS account ID
  1885. region: Region to scan
  1886. Returns:
  1887. List of RDS DB Instance resource dictionaries
  1888. Attributes (vertical layout - one table per instance):
  1889. Region, Endpoint, DB instance ID, DB name, Master Username, Port,
  1890. DB Engine, DB Version, Instance Type, Storage type, Storage, Multi-AZ,
  1891. Security Group, Deletion Protection, Performance Insights Enabled, CloudWatch Logs
  1892. """
  1893. resources = []
  1894. rds_client = self._session.client("rds", region_name=region)
  1895. try:
  1896. paginator = rds_client.get_paginator("describe_db_instances")
  1897. for page in paginator.paginate():
  1898. for db in page.get("DBInstances", []):
  1899. db_id = db.get("DBInstanceIdentifier", "")
  1900. # Get security groups
  1901. security_groups = []
  1902. for sg in db.get("VpcSecurityGroups", []):
  1903. security_groups.append(sg.get("VpcSecurityGroupId", ""))
  1904. # Get CloudWatch logs exports
  1905. cw_logs = db.get("EnabledCloudwatchLogsExports", [])
  1906. # Get endpoint
  1907. endpoint = db.get("Endpoint", {})
  1908. endpoint_address = endpoint.get("Address", "")
  1909. port = endpoint.get("Port", "")
  1910. resources.append({
  1911. "account_id": account_id,
  1912. "region": region,
  1913. "service": "rds",
  1914. "resource_type": "DB Instance",
  1915. "resource_id": db.get("DBInstanceArn", db_id),
  1916. "name": db_id,
  1917. "attributes": {
  1918. "Region": region,
  1919. "Endpoint": endpoint_address,
  1920. "DB instance ID": db_id,
  1921. "DB name": db.get("DBName", ""),
  1922. "Master Username": db.get("MasterUsername", ""),
  1923. "Port": str(port),
  1924. "DB Engine": db.get("Engine", ""),
  1925. "DB Version": db.get("EngineVersion", ""),
  1926. "Instance Type": db.get("DBInstanceClass", ""),
  1927. "Storage type": db.get("StorageType", ""),
  1928. "Storage": f"{db.get('AllocatedStorage', '')} GB",
  1929. "Multi-AZ": "Yes" if db.get("MultiAZ") else "No",
  1930. "Security Group": ", ".join(security_groups),
  1931. "Deletion Protection": (
  1932. "Yes" if db.get("DeletionProtection") else "No"
  1933. ),
  1934. "Performance Insights Enabled": (
  1935. "Yes" if db.get("PerformanceInsightsEnabled") else "No"
  1936. ),
  1937. "CloudWatch Logs": (
  1938. ", ".join(cw_logs) if cw_logs else "N/A"
  1939. ),
  1940. },
  1941. })
  1942. except Exception as e:
  1943. logger.warning(f"Failed to scan RDS instances: {str(e)}")
  1944. return resources
  1945. def _scan_elasticache_clusters(
  1946. self, account_id: str, region: str
  1947. ) -> List[Dict[str, Any]]:
  1948. """
  1949. Scan ElastiCache Clusters in the specified region.
  1950. Args:
  1951. account_id: AWS account ID
  1952. region: Region to scan
  1953. Returns:
  1954. List of ElastiCache Cluster resource dictionaries
  1955. Attributes (vertical layout - one table per cluster):
  1956. Cluster ID, Engine, Engine Version, Node Type, Num Nodes, Status
  1957. """
  1958. resources = []
  1959. elasticache_client = self._session.client("elasticache", region_name=region)
  1960. # Scan cache clusters (Redis/Memcached)
  1961. try:
  1962. paginator = elasticache_client.get_paginator("describe_cache_clusters")
  1963. for page in paginator.paginate(ShowCacheNodeInfo=True):
  1964. for cluster in page.get("CacheClusters", []):
  1965. cluster_id = cluster.get("CacheClusterId", "")
  1966. resources.append({
  1967. "account_id": account_id,
  1968. "region": region,
  1969. "service": "elasticache",
  1970. "resource_type": "Cache Cluster",
  1971. "resource_id": cluster.get("ARN", cluster_id),
  1972. "name": cluster_id,
  1973. "attributes": {
  1974. "Cluster ID": cluster_id,
  1975. "Engine": cluster.get("Engine", ""),
  1976. "Engine Version": cluster.get("EngineVersion", ""),
  1977. "Node Type": cluster.get("CacheNodeType", ""),
  1978. "Num Nodes": str(cluster.get("NumCacheNodes", 0)),
  1979. "Status": cluster.get("CacheClusterStatus", ""),
  1980. },
  1981. })
  1982. except Exception as e:
  1983. logger.warning(f"Failed to scan ElastiCache clusters: {str(e)}")
  1984. # Also scan replication groups (Redis cluster mode)
  1985. try:
  1986. paginator = elasticache_client.get_paginator("describe_replication_groups")
  1987. for page in paginator.paginate():
  1988. for rg in page.get("ReplicationGroups", []):
  1989. rg_id = rg.get("ReplicationGroupId", "")
  1990. # Count nodes
  1991. num_nodes = 0
  1992. for node_group in rg.get("NodeGroups", []):
  1993. num_nodes += len(node_group.get("NodeGroupMembers", []))
  1994. # Get node type from member clusters
  1995. node_type = ""
  1996. member_clusters = rg.get("MemberClusters", [])
  1997. if member_clusters:
  1998. try:
  1999. cluster_response = elasticache_client.describe_cache_clusters(
  2000. CacheClusterId=member_clusters[0]
  2001. )
  2002. if cluster_response.get("CacheClusters"):
  2003. node_type = cluster_response["CacheClusters"][0].get(
  2004. "CacheNodeType", ""
  2005. )
  2006. except Exception:
  2007. pass
  2008. resources.append({
  2009. "account_id": account_id,
  2010. "region": region,
  2011. "service": "elasticache",
  2012. "resource_type": "Cache Cluster",
  2013. "resource_id": rg.get("ARN", rg_id),
  2014. "name": rg_id,
  2015. "attributes": {
  2016. "Cluster ID": rg_id,
  2017. "Engine": "redis",
  2018. "Engine Version": "",
  2019. "Node Type": node_type,
  2020. "Num Nodes": str(num_nodes),
  2021. "Status": rg.get("Status", ""),
  2022. },
  2023. })
  2024. except Exception as e:
  2025. logger.warning(f"Failed to scan ElastiCache replication groups: {str(e)}")
  2026. return resources
  2027. def _scan_s3_buckets(
  2028. self, account_id: str, region: str
  2029. ) -> List[Dict[str, Any]]:
  2030. """
  2031. Scan S3 Buckets (global service, scanned once from us-east-1).
  2032. Args:
  2033. account_id: AWS account ID
  2034. region: Region to scan (should be us-east-1 for global service)
  2035. Returns:
  2036. List of S3 Bucket resource dictionaries
  2037. Attributes (horizontal layout): Region, Bucket Name
  2038. """
  2039. resources = []
  2040. s3_client = self._session.client("s3", region_name=region)
  2041. try:
  2042. response = s3_client.list_buckets()
  2043. for bucket in response.get("Buckets", []):
  2044. bucket_name = bucket.get("Name", "")
  2045. # Get bucket location
  2046. try:
  2047. location_response = s3_client.get_bucket_location(
  2048. Bucket=bucket_name
  2049. )
  2050. bucket_region = (
  2051. location_response.get("LocationConstraint") or "us-east-1"
  2052. )
  2053. except Exception:
  2054. bucket_region = "unknown"
  2055. resources.append({
  2056. "account_id": account_id,
  2057. "region": "global",
  2058. "service": "s3",
  2059. "resource_type": "Bucket",
  2060. "resource_id": bucket_name,
  2061. "name": bucket_name,
  2062. "attributes": {
  2063. "Region": bucket_region,
  2064. "Bucket Name": bucket_name,
  2065. },
  2066. })
  2067. except Exception as e:
  2068. logger.warning(f"Failed to scan S3 buckets: {str(e)}")
  2069. return resources
  2070. def _scan_s3_event_notifications(
  2071. self, account_id: str, region: str
  2072. ) -> List[Dict[str, Any]]:
  2073. """
  2074. Scan S3 Event Notifications (global service, scanned once from us-east-1).
  2075. Args:
  2076. account_id: AWS account ID
  2077. region: Region to scan (should be us-east-1 for global service)
  2078. Returns:
  2079. List of S3 Event Notification resource dictionaries
  2080. Attributes (vertical layout):
  2081. Bucket, Name, Event Type, Destination type, Destination
  2082. """
  2083. resources = []
  2084. s3_client = self._session.client("s3", region_name=region)
  2085. try:
  2086. # First get all buckets
  2087. buckets_response = s3_client.list_buckets()
  2088. for bucket in buckets_response.get("Buckets", []):
  2089. bucket_name = bucket.get("Name", "")
  2090. try:
  2091. # Get notification configuration
  2092. notif_response = s3_client.get_bucket_notification_configuration(
  2093. Bucket=bucket_name
  2094. )
  2095. # Process Lambda function configurations
  2096. for config in notif_response.get(
  2097. "LambdaFunctionConfigurations", []
  2098. ):
  2099. config_id = config.get("Id", "Lambda")
  2100. events = config.get("Events", [])
  2101. lambda_arn = config.get("LambdaFunctionArn", "")
  2102. resources.append({
  2103. "account_id": account_id,
  2104. "region": "global",
  2105. "service": "s3_event_notification",
  2106. "resource_type": "S3 event notification",
  2107. "resource_id": f"{bucket_name}/{config_id}",
  2108. "name": config_id,
  2109. "attributes": {
  2110. "Bucket": bucket_name,
  2111. "Name": config_id,
  2112. "Event Type": ", ".join(events),
  2113. "Destination type": "Lambda",
  2114. "Destination": (
  2115. lambda_arn.split(":")[-1] if lambda_arn else ""
  2116. ),
  2117. },
  2118. })
  2119. # Process SQS queue configurations
  2120. for config in notif_response.get("QueueConfigurations", []):
  2121. config_id = config.get("Id", "SQS")
  2122. events = config.get("Events", [])
  2123. queue_arn = config.get("QueueArn", "")
  2124. resources.append({
  2125. "account_id": account_id,
  2126. "region": "global",
  2127. "service": "s3_event_notification",
  2128. "resource_type": "S3 event notification",
  2129. "resource_id": f"{bucket_name}/{config_id}",
  2130. "name": config_id,
  2131. "attributes": {
  2132. "Bucket": bucket_name,
  2133. "Name": config_id,
  2134. "Event Type": ", ".join(events),
  2135. "Destination type": "SQS",
  2136. "Destination": (
  2137. queue_arn.split(":")[-1] if queue_arn else ""
  2138. ),
  2139. },
  2140. })
  2141. # Process SNS topic configurations
  2142. for config in notif_response.get("TopicConfigurations", []):
  2143. config_id = config.get("Id", "SNS")
  2144. events = config.get("Events", [])
  2145. topic_arn = config.get("TopicArn", "")
  2146. resources.append({
  2147. "account_id": account_id,
  2148. "region": "global",
  2149. "service": "s3_event_notification",
  2150. "resource_type": "S3 event notification",
  2151. "resource_id": f"{bucket_name}/{config_id}",
  2152. "name": config_id,
  2153. "attributes": {
  2154. "Bucket": bucket_name,
  2155. "Name": config_id,
  2156. "Event Type": ", ".join(events),
  2157. "Destination type": "SNS",
  2158. "Destination": (
  2159. topic_arn.split(":")[-1] if topic_arn else ""
  2160. ),
  2161. },
  2162. })
  2163. except Exception as e:
  2164. # Skip buckets we can't access
  2165. logger.debug(
  2166. f"Failed to get notifications for bucket {bucket_name}: "
  2167. f"{str(e)}"
  2168. )
  2169. except Exception as e:
  2170. logger.warning(f"Failed to scan S3 event notifications: {str(e)}")
  2171. return resources
  2172. # =========================================================================
  2173. # Global and Monitoring Service Scanners (Task 1.5)
  2174. # =========================================================================
  2175. def _scan_cloudfront_distributions(
  2176. self, account_id: str, region: str
  2177. ) -> List[Dict[str, Any]]:
  2178. """
  2179. Scan CloudFront Distributions (global service).
  2180. Args:
  2181. account_id: AWS account ID
  2182. region: Region to scan (should be us-east-1 for global service)
  2183. Returns:
  2184. List of CloudFront Distribution resource dictionaries
  2185. Attributes (vertical layout - one table per distribution):
  2186. CloudFront ID, Domain Name, CNAME, Origin Domain Name,
  2187. Origin Protocol Policy, Viewer Protocol Policy,
  2188. Allowed HTTP Methods, Cached HTTP Methods
  2189. """
  2190. resources = []
  2191. # CloudFront is a global service, always use us-east-1
  2192. cf_client = self._session.client("cloudfront", region_name="us-east-1")
  2193. try:
  2194. paginator = cf_client.get_paginator("list_distributions")
  2195. for page in paginator.paginate():
  2196. distribution_list = page.get("DistributionList", {})
  2197. for dist in distribution_list.get("Items", []):
  2198. dist_id = dist.get("Id", "")
  2199. # Get aliases (CNAMEs)
  2200. aliases = dist.get("Aliases", {}).get("Items", [])
  2201. # Get origin info
  2202. origins = dist.get("Origins", {}).get("Items", [])
  2203. origin_domain = ""
  2204. origin_protocol = ""
  2205. if origins:
  2206. origin = origins[0]
  2207. origin_domain = origin.get("DomainName", "")
  2208. custom_origin = origin.get("CustomOriginConfig", {})
  2209. if custom_origin:
  2210. origin_protocol = custom_origin.get(
  2211. "OriginProtocolPolicy", ""
  2212. )
  2213. else:
  2214. origin_protocol = "S3"
  2215. # Get default cache behavior
  2216. default_behavior = dist.get("DefaultCacheBehavior", {})
  2217. viewer_protocol = default_behavior.get(
  2218. "ViewerProtocolPolicy", ""
  2219. )
  2220. allowed_methods = default_behavior.get(
  2221. "AllowedMethods", {}
  2222. ).get("Items", [])
  2223. cached_methods = default_behavior.get(
  2224. "AllowedMethods", {}
  2225. ).get("CachedMethods", {}).get("Items", [])
  2226. resources.append({
  2227. "account_id": account_id,
  2228. "region": "global",
  2229. "service": "cloudfront",
  2230. "resource_type": "Distribution",
  2231. "resource_id": dist.get("ARN", dist_id),
  2232. "name": dist_id,
  2233. "attributes": {
  2234. "CloudFront ID": dist_id,
  2235. "Domain Name": dist.get("DomainName", ""),
  2236. "CNAME": ", ".join(aliases) if aliases else "N/A",
  2237. "Origin Domain Name": origin_domain,
  2238. "Origin Protocol Policy": origin_protocol,
  2239. "Viewer Protocol Policy": viewer_protocol,
  2240. "Allowed HTTP Methods": ", ".join(allowed_methods),
  2241. "Cached HTTP Methods": ", ".join(cached_methods),
  2242. },
  2243. })
  2244. except Exception as e:
  2245. logger.warning(f"Failed to scan CloudFront distributions: {str(e)}")
  2246. return resources
  2247. def _scan_route53_hosted_zones(
  2248. self, account_id: str, region: str
  2249. ) -> List[Dict[str, Any]]:
  2250. """
  2251. Scan Route 53 Hosted Zones (global service).
  2252. Args:
  2253. account_id: AWS account ID
  2254. region: Region to scan (should be us-east-1 for global service)
  2255. Returns:
  2256. List of Route 53 Hosted Zone resource dictionaries
  2257. Attributes (horizontal layout):
  2258. Zone ID, Name, Type, Record Count
  2259. """
  2260. resources = []
  2261. # Route 53 is a global service
  2262. route53_client = self._session.client("route53", region_name="us-east-1")
  2263. try:
  2264. paginator = route53_client.get_paginator("list_hosted_zones")
  2265. for page in paginator.paginate():
  2266. for zone in page.get("HostedZones", []):
  2267. zone_id = zone.get("Id", "").replace("/hostedzone/", "")
  2268. zone_name = zone.get("Name", "")
  2269. # Determine zone type
  2270. zone_type = (
  2271. "Private"
  2272. if zone.get("Config", {}).get("PrivateZone")
  2273. else "Public"
  2274. )
  2275. resources.append({
  2276. "account_id": account_id,
  2277. "region": "global",
  2278. "service": "route53",
  2279. "resource_type": "Hosted Zone",
  2280. "resource_id": zone_id,
  2281. "name": zone_name,
  2282. "attributes": {
  2283. "Zone ID": zone_id,
  2284. "Name": zone_name,
  2285. "Type": zone_type,
  2286. "Record Count": str(
  2287. zone.get("ResourceRecordSetCount", 0)
  2288. ),
  2289. },
  2290. })
  2291. except Exception as e:
  2292. logger.warning(f"Failed to scan Route 53 hosted zones: {str(e)}")
  2293. return resources
  2294. def _scan_acm_certificates(
  2295. self, account_id: str, region: str
  2296. ) -> List[Dict[str, Any]]:
  2297. """
  2298. Scan ACM Certificates (regional service).
  2299. Args:
  2300. account_id: AWS account ID
  2301. region: Region to scan
  2302. Returns:
  2303. List of ACM Certificate resource dictionaries
  2304. Attributes (horizontal layout): Domain name, Additional names
  2305. """
  2306. resources = []
  2307. # ACM is a regional service
  2308. acm_client = self._session.client("acm", region_name=region)
  2309. try:
  2310. paginator = acm_client.get_paginator("list_certificates")
  2311. for page in paginator.paginate():
  2312. for cert in page.get("CertificateSummaryList", []):
  2313. domain_name = cert.get("DomainName", "")
  2314. cert_arn = cert.get("CertificateArn", "")
  2315. # Get additional names (Subject Alternative Names)
  2316. additional_names = ""
  2317. try:
  2318. cert_detail = acm_client.describe_certificate(
  2319. CertificateArn=cert_arn
  2320. )
  2321. sans = cert_detail.get("Certificate", {}).get(
  2322. "SubjectAlternativeNames", []
  2323. )
  2324. # Filter out the main domain name from SANs
  2325. additional = [san for san in sans if san != domain_name]
  2326. additional_names = ", ".join(additional) if additional else ""
  2327. except Exception:
  2328. pass
  2329. resources.append({
  2330. "account_id": account_id,
  2331. "region": region,
  2332. "service": "acm",
  2333. "resource_type": "Certificate",
  2334. "resource_id": cert_arn,
  2335. "name": domain_name,
  2336. "attributes": {
  2337. "Domain name": domain_name,
  2338. "Additional names": additional_names,
  2339. },
  2340. })
  2341. except Exception as e:
  2342. logger.warning(f"Failed to scan ACM certificates in {region}: {str(e)}")
  2343. return resources
  2344. def _scan_waf_web_acls(
  2345. self, account_id: str, region: str
  2346. ) -> List[Dict[str, Any]]:
  2347. """
  2348. Scan WAF Web ACLs (global service for CloudFront).
  2349. Args:
  2350. account_id: AWS account ID
  2351. region: Region to scan (should be us-east-1 for global service)
  2352. Returns:
  2353. List of WAF Web ACL resource dictionaries
  2354. Attributes (horizontal layout):
  2355. WebACL Name, Scope, Rules Count, Associated Resources
  2356. """
  2357. resources = []
  2358. # Scan WAFv2 global (CloudFront) Web ACLs
  2359. wafv2_client = self._session.client("wafv2", region_name="us-east-1")
  2360. try:
  2361. # List CloudFront Web ACLs (CLOUDFRONT scope)
  2362. response = wafv2_client.list_web_acls(Scope="CLOUDFRONT")
  2363. for acl in response.get("WebACLs", []):
  2364. acl_name = acl.get("Name", "")
  2365. acl_id = acl.get("Id", "")
  2366. acl_arn = acl.get("ARN", "")
  2367. # Get Web ACL details for rules count
  2368. rules_count = 0
  2369. associated_resources = []
  2370. try:
  2371. acl_response = wafv2_client.get_web_acl(
  2372. Name=acl_name,
  2373. Scope="CLOUDFRONT",
  2374. Id=acl_id,
  2375. )
  2376. web_acl = acl_response.get("WebACL", {})
  2377. rules_count = len(web_acl.get("Rules", []))
  2378. # Get associated resources
  2379. resources_response = wafv2_client.list_resources_for_web_acl(
  2380. WebACLArn=acl_arn
  2381. )
  2382. for resource_arn in resources_response.get("ResourceArns", []):
  2383. # Extract resource name from ARN
  2384. resource_name = resource_arn.split("/")[-1]
  2385. associated_resources.append(resource_name)
  2386. except Exception as e:
  2387. logger.debug(f"Failed to get WAF ACL details: {str(e)}")
  2388. resources.append({
  2389. "account_id": account_id,
  2390. "region": "global",
  2391. "service": "waf",
  2392. "resource_type": "Web ACL",
  2393. "resource_id": acl_arn,
  2394. "name": acl_name,
  2395. "attributes": {
  2396. "WebACL Name": acl_name,
  2397. "Scope": "CLOUDFRONT",
  2398. "Rules Count": str(rules_count),
  2399. "Associated Resources": (
  2400. ", ".join(associated_resources)
  2401. if associated_resources
  2402. else "None"
  2403. ),
  2404. },
  2405. })
  2406. except Exception as e:
  2407. logger.warning(f"Failed to scan WAFv2 Web ACLs: {str(e)}")
  2408. # Also scan regional WAF Web ACLs
  2409. try:
  2410. response = wafv2_client.list_web_acls(Scope="REGIONAL")
  2411. for acl in response.get("WebACLs", []):
  2412. acl_name = acl.get("Name", "")
  2413. acl_id = acl.get("Id", "")
  2414. acl_arn = acl.get("ARN", "")
  2415. rules_count = 0
  2416. associated_resources = []
  2417. try:
  2418. acl_response = wafv2_client.get_web_acl(
  2419. Name=acl_name,
  2420. Scope="REGIONAL",
  2421. Id=acl_id,
  2422. )
  2423. web_acl = acl_response.get("WebACL", {})
  2424. rules_count = len(web_acl.get("Rules", []))
  2425. resources_response = wafv2_client.list_resources_for_web_acl(
  2426. WebACLArn=acl_arn
  2427. )
  2428. for resource_arn in resources_response.get("ResourceArns", []):
  2429. resource_name = resource_arn.split("/")[-1]
  2430. associated_resources.append(resource_name)
  2431. except Exception as e:
  2432. logger.debug(f"Failed to get WAF ACL details: {str(e)}")
  2433. resources.append({
  2434. "account_id": account_id,
  2435. "region": "global",
  2436. "service": "waf",
  2437. "resource_type": "Web ACL",
  2438. "resource_id": acl_arn,
  2439. "name": acl_name,
  2440. "attributes": {
  2441. "WebACL Name": acl_name,
  2442. "Scope": "REGIONAL",
  2443. "Rules Count": str(rules_count),
  2444. "Associated Resources": (
  2445. ", ".join(associated_resources)
  2446. if associated_resources
  2447. else "None"
  2448. ),
  2449. },
  2450. })
  2451. except Exception as e:
  2452. logger.warning(f"Failed to scan regional WAFv2 Web ACLs: {str(e)}")
  2453. return resources
  2454. def _scan_sns_topics(
  2455. self, account_id: str, region: str
  2456. ) -> List[Dict[str, Any]]:
  2457. """
  2458. Scan SNS Topics in the specified region.
  2459. Args:
  2460. account_id: AWS account ID
  2461. region: Region to scan
  2462. Returns:
  2463. List of SNS Topic resource dictionaries
  2464. Attributes (horizontal layout):
  2465. Topic Name, Topic Display Name, Subscription Protocol, Subscription Endpoint
  2466. """
  2467. resources = []
  2468. sns_client = self._session.client("sns", region_name=region)
  2469. try:
  2470. paginator = sns_client.get_paginator("list_topics")
  2471. for page in paginator.paginate():
  2472. for topic in page.get("Topics", []):
  2473. topic_arn = topic.get("TopicArn", "")
  2474. topic_name = topic_arn.split(":")[-1] if topic_arn else ""
  2475. # Get topic attributes
  2476. display_name = ""
  2477. try:
  2478. attrs_response = sns_client.get_topic_attributes(
  2479. TopicArn=topic_arn
  2480. )
  2481. attrs = attrs_response.get("Attributes", {})
  2482. display_name = attrs.get("DisplayName", "")
  2483. except Exception as e:
  2484. logger.debug(f"Failed to get topic attributes: {str(e)}")
  2485. # Get subscriptions
  2486. subscriptions = []
  2487. try:
  2488. sub_paginator = sns_client.get_paginator(
  2489. "list_subscriptions_by_topic"
  2490. )
  2491. for sub_page in sub_paginator.paginate(TopicArn=topic_arn):
  2492. for sub in sub_page.get("Subscriptions", []):
  2493. protocol = sub.get("Protocol", "")
  2494. endpoint = sub.get("Endpoint", "")
  2495. subscriptions.append({
  2496. "protocol": protocol,
  2497. "endpoint": endpoint,
  2498. })
  2499. except Exception as e:
  2500. logger.debug(f"Failed to get subscriptions: {str(e)}")
  2501. # Create one entry per subscription, or one entry if no subscriptions
  2502. if subscriptions:
  2503. for sub in subscriptions:
  2504. resources.append({
  2505. "account_id": account_id,
  2506. "region": region,
  2507. "service": "sns",
  2508. "resource_type": "Topic",
  2509. "resource_id": topic_arn,
  2510. "name": topic_name,
  2511. "attributes": {
  2512. "Topic Name": topic_name,
  2513. "Topic Display Name": display_name,
  2514. "Subscription Protocol": sub["protocol"],
  2515. "Subscription Endpoint": sub["endpoint"],
  2516. },
  2517. })
  2518. else:
  2519. resources.append({
  2520. "account_id": account_id,
  2521. "region": region,
  2522. "service": "sns",
  2523. "resource_type": "Topic",
  2524. "resource_id": topic_arn,
  2525. "name": topic_name,
  2526. "attributes": {
  2527. "Topic Name": topic_name,
  2528. "Topic Display Name": display_name,
  2529. "Subscription Protocol": "N/A",
  2530. "Subscription Endpoint": "N/A",
  2531. },
  2532. })
  2533. except Exception as e:
  2534. logger.warning(f"Failed to scan SNS topics: {str(e)}")
  2535. return resources
  2536. def _scan_cloudwatch_log_groups(
  2537. self, account_id: str, region: str
  2538. ) -> List[Dict[str, Any]]:
  2539. """
  2540. Scan CloudWatch Log Groups in the specified region.
  2541. Args:
  2542. account_id: AWS account ID
  2543. region: Region to scan
  2544. Returns:
  2545. List of CloudWatch Log Group resource dictionaries
  2546. Attributes (horizontal layout):
  2547. Log Group Name, Retention Days, Stored Bytes, KMS Encryption
  2548. """
  2549. resources = []
  2550. logs_client = self._session.client("logs", region_name=region)
  2551. try:
  2552. paginator = logs_client.get_paginator("describe_log_groups")
  2553. for page in paginator.paginate():
  2554. for log_group in page.get("logGroups", []):
  2555. log_group_name = log_group.get("logGroupName", "")
  2556. # Get retention in days
  2557. retention = log_group.get("retentionInDays")
  2558. retention_str = str(retention) if retention else "Never Expire"
  2559. # Get stored bytes
  2560. stored_bytes = log_group.get("storedBytes", 0)
  2561. stored_str = (
  2562. f"{stored_bytes / (1024*1024):.2f} MB"
  2563. if stored_bytes
  2564. else "0 MB"
  2565. )
  2566. # Check KMS encryption
  2567. kms_key = log_group.get("kmsKeyId", "")
  2568. kms_encrypted = "Yes" if kms_key else "No"
  2569. resources.append({
  2570. "account_id": account_id,
  2571. "region": region,
  2572. "service": "cloudwatch",
  2573. "resource_type": "Log Group",
  2574. "resource_id": log_group.get("arn", log_group_name),
  2575. "name": log_group_name,
  2576. "attributes": {
  2577. "Log Group Name": log_group_name,
  2578. "Retention Days": retention_str,
  2579. "Stored Bytes": stored_str,
  2580. "KMS Encryption": kms_encrypted,
  2581. },
  2582. })
  2583. except Exception as e:
  2584. logger.warning(f"Failed to scan CloudWatch log groups: {str(e)}")
  2585. return resources
  2586. def _scan_eventbridge_rules(
  2587. self, account_id: str, region: str
  2588. ) -> List[Dict[str, Any]]:
  2589. """
  2590. Scan EventBridge Rules in the specified region.
  2591. Args:
  2592. account_id: AWS account ID
  2593. region: Region to scan
  2594. Returns:
  2595. List of EventBridge Rule resource dictionaries
  2596. Attributes (horizontal layout):
  2597. Name, Description, Event Bus, State
  2598. """
  2599. resources = []
  2600. events_client = self._session.client("events", region_name=region)
  2601. try:
  2602. # List event buses first
  2603. buses_response = events_client.list_event_buses()
  2604. event_buses = [
  2605. bus.get("Name", "default")
  2606. for bus in buses_response.get("EventBuses", [])
  2607. ]
  2608. # If no buses found, use default
  2609. if not event_buses:
  2610. event_buses = ["default"]
  2611. for bus_name in event_buses:
  2612. try:
  2613. paginator = events_client.get_paginator("list_rules")
  2614. for page in paginator.paginate(EventBusName=bus_name):
  2615. for rule in page.get("Rules", []):
  2616. rule_name = rule.get("Name", "")
  2617. resources.append({
  2618. "account_id": account_id,
  2619. "region": region,
  2620. "service": "eventbridge",
  2621. "resource_type": "Rule",
  2622. "resource_id": rule.get("Arn", rule_name),
  2623. "name": rule_name,
  2624. "attributes": {
  2625. "Name": rule_name,
  2626. "Description": rule.get("Description", ""),
  2627. "Event Bus": bus_name,
  2628. "State": rule.get("State", ""),
  2629. },
  2630. })
  2631. except Exception as e:
  2632. logger.debug(
  2633. f"Failed to list rules for bus {bus_name}: {str(e)}"
  2634. )
  2635. except Exception as e:
  2636. logger.warning(f"Failed to scan EventBridge rules: {str(e)}")
  2637. return resources
  2638. def _scan_cloudtrail_trails(
  2639. self, account_id: str, region: str
  2640. ) -> List[Dict[str, Any]]:
  2641. """
  2642. Scan CloudTrail Trails (global service).
  2643. Args:
  2644. account_id: AWS account ID
  2645. region: Region to scan (should be us-east-1 for global service)
  2646. Returns:
  2647. List of CloudTrail Trail resource dictionaries
  2648. Attributes (horizontal layout):
  2649. Name, Multi-Region Trail, Log File Validation, KMS Encryption
  2650. """
  2651. resources = []
  2652. cloudtrail_client = self._session.client(
  2653. "cloudtrail", region_name="us-east-1"
  2654. )
  2655. try:
  2656. response = cloudtrail_client.describe_trails()
  2657. for trail in response.get("trailList", []):
  2658. trail_name = trail.get("Name", "")
  2659. # Get multi-region status
  2660. is_multi_region = trail.get("IsMultiRegionTrail", False)
  2661. resources.append({
  2662. "account_id": account_id,
  2663. "region": "global",
  2664. "service": "cloudtrail",
  2665. "resource_type": "Trail",
  2666. "resource_id": trail.get("TrailARN", trail_name),
  2667. "name": trail_name,
  2668. "attributes": {
  2669. "Name": trail_name,
  2670. "Multi-Region Trail": "Yes" if is_multi_region else "No",
  2671. "Log File Validation": (
  2672. "Yes" if trail.get("LogFileValidationEnabled") else "No"
  2673. ),
  2674. "KMS Encryption": (
  2675. "Yes" if trail.get("KmsKeyId") else "No"
  2676. ),
  2677. },
  2678. })
  2679. except Exception as e:
  2680. logger.warning(f"Failed to scan CloudTrail trails: {str(e)}")
  2681. return resources
  2682. def _scan_config_recorders(
  2683. self, account_id: str, region: str
  2684. ) -> List[Dict[str, Any]]:
  2685. """
  2686. Scan AWS Config Recorders in the specified region.
  2687. Args:
  2688. account_id: AWS account ID
  2689. region: Region to scan
  2690. Returns:
  2691. List of AWS Config Recorder resource dictionaries
  2692. Attributes (horizontal layout):
  2693. Name, Regional Resources, Global Resources, Retention period
  2694. """
  2695. resources = []
  2696. config_client = self._session.client("config", region_name=region)
  2697. try:
  2698. response = config_client.describe_configuration_recorders()
  2699. for recorder in response.get("ConfigurationRecorders", []):
  2700. recorder_name = recorder.get("name", "")
  2701. # Get recording group settings
  2702. recording_group = recorder.get("recordingGroup", {})
  2703. all_supported = recording_group.get("allSupported", False)
  2704. include_global = recording_group.get(
  2705. "includeGlobalResourceTypes", False
  2706. )
  2707. # Get retention period
  2708. retention_period = "N/A"
  2709. try:
  2710. retention_response = (
  2711. config_client.describe_retention_configurations()
  2712. )
  2713. for retention in retention_response.get(
  2714. "RetentionConfigurations", []
  2715. ):
  2716. retention_period = (
  2717. f"{retention.get('RetentionPeriodInDays', 'N/A')} days"
  2718. )
  2719. break
  2720. except Exception:
  2721. pass
  2722. resources.append({
  2723. "account_id": account_id,
  2724. "region": region,
  2725. "service": "config",
  2726. "resource_type": "Config",
  2727. "resource_id": recorder_name,
  2728. "name": recorder_name,
  2729. "attributes": {
  2730. "Name": recorder_name,
  2731. "Regional Resources": "Yes" if all_supported else "No",
  2732. "Global Resources": "Yes" if include_global else "No",
  2733. "Retention period": retention_period,
  2734. },
  2735. })
  2736. except Exception as e:
  2737. logger.warning(f"Failed to scan Config recorders: {str(e)}")
  2738. return resources
  2739. def parse_arguments() -> argparse.Namespace:
  2740. """
  2741. Parse command-line arguments.
  2742. Returns:
  2743. Parsed arguments namespace
  2744. """
  2745. parser = argparse.ArgumentParser(
  2746. description="CloudShell Scanner - AWS Resource Scanner for CloudShell Environment",
  2747. formatter_class=argparse.RawDescriptionHelpFormatter,
  2748. epilog="""
  2749. Examples:
  2750. # Scan all regions and services
  2751. python cloudshell_scanner.py
  2752. # Scan specific regions
  2753. python cloudshell_scanner.py --regions us-east-1,ap-northeast-1
  2754. # Specify output file
  2755. python cloudshell_scanner.py --output my_scan.json
  2756. # Scan specific services
  2757. python cloudshell_scanner.py --services ec2,vpc,rds
  2758. # Combine options
  2759. python cloudshell_scanner.py --regions us-east-1 --services ec2,vpc --output scan.json
  2760. """,
  2761. )
  2762. parser.add_argument(
  2763. "--regions",
  2764. type=str,
  2765. default=None,
  2766. help="Comma-separated list of AWS regions to scan (default: all regions)",
  2767. )
  2768. parser.add_argument(
  2769. "--output",
  2770. type=str,
  2771. default="scan_result.json",
  2772. help="Output JSON file path (default: scan_result.json)",
  2773. )
  2774. parser.add_argument(
  2775. "--services",
  2776. type=str,
  2777. default=None,
  2778. help="Comma-separated list of services to scan (default: all services)",
  2779. )
  2780. parser.add_argument(
  2781. "--version",
  2782. action="version",
  2783. version=f"CloudShell Scanner v{__version__}",
  2784. )
  2785. parser.add_argument(
  2786. "--verbose",
  2787. "-v",
  2788. action="store_true",
  2789. help="Enable verbose logging",
  2790. )
  2791. parser.add_argument(
  2792. "--list-services",
  2793. action="store_true",
  2794. help="List all supported services and exit",
  2795. )
  2796. return parser.parse_args()
  2797. def main() -> int:
  2798. """
  2799. Main entry point for the CloudShell Scanner.
  2800. Returns:
  2801. Exit code (0 for success, non-zero for failure)
  2802. """
  2803. args = parse_arguments()
  2804. # Set logging level
  2805. if args.verbose:
  2806. logging.getLogger().setLevel(logging.DEBUG)
  2807. logger.debug("Verbose logging enabled")
  2808. # List services and exit if requested
  2809. if args.list_services:
  2810. print("Supported services:")
  2811. for service in CloudShellScanner.SUPPORTED_SERVICES:
  2812. global_marker = " (global)" if service in CloudShellScanner.GLOBAL_SERVICES else ""
  2813. print(f" - {service}{global_marker}")
  2814. return 0
  2815. # Parse regions
  2816. regions: Optional[List[str]] = None
  2817. if args.regions:
  2818. regions = [r.strip() for r in args.regions.split(",")]
  2819. logger.info(f"Regions specified: {regions}")
  2820. # Parse services
  2821. services: Optional[List[str]] = None
  2822. if args.services:
  2823. services = [s.strip() for s in args.services.split(",")]
  2824. logger.info(f"Services specified: {services}")
  2825. try:
  2826. # Initialize scanner
  2827. print(f"CloudShell Scanner v{__version__}")
  2828. print("=" * 50)
  2829. scanner = CloudShellScanner()
  2830. # Get account info
  2831. account_id = scanner.get_account_id()
  2832. print(f"AWS Account: {account_id}")
  2833. print("=" * 50)
  2834. # Run scan
  2835. result = scanner.scan_resources(regions=regions, services=services)
  2836. # Export results
  2837. scanner.export_json(result, args.output)
  2838. # Print summary
  2839. print("\n" + "=" * 50)
  2840. print("Scan Summary:")
  2841. print(f" Account ID: {result['metadata']['account_id']}")
  2842. print(f" Regions scanned: {len(result['metadata']['regions_scanned'])}")
  2843. print(f" Services scanned: {len(result['metadata']['services_scanned'])}")
  2844. print(f" Total resources: {result['metadata']['total_resources']}")
  2845. print(f" Total errors: {result['metadata']['total_errors']}")
  2846. print(f" Output file: {args.output}")
  2847. print("=" * 50)
  2848. return 0
  2849. except KeyboardInterrupt:
  2850. print("\n\nScan interrupted by user")
  2851. return 130
  2852. except Exception as e:
  2853. logger.error(f"Scan failed: {e}")
  2854. if args.verbose:
  2855. import traceback
  2856. traceback.print_exc()
  2857. return 1
  2858. if __name__ == "__main__":
  2859. sys.exit(main())