# Risk-aware courier team for CS470 Assignment 3.

from captureAgents import CaptureAgent
import heapq
import random
from game import Directions
from util import nearestPoint


def createTeam(firstIndex, secondIndex, isRed,
               first='RiskAwareCourierAgent',
               second='CourierDefensiveAgent'):
  return [eval(first)(firstIndex), eval(second)(secondIndex)]


class TargetAgent(CaptureAgent):
  def registerInitialState(self, gameState):
    self.start = gameState.getAgentPosition(self.index)
    self.width = gameState.data.layout.width
    self.height = gameState.data.layout.height
    self.compactLayout = self.width <= 20 and self.height <= 7
    CaptureAgent.registerInitialState(self, gameState)
    self.homeEntries = self.getHomeEntries(gameState)

  def chooseAction(self, gameState):
    actions = gameState.getLegalActions(self.index)
    values = [self.evaluateAction(gameState, action) for action in actions]
    bestValue = max(values)
    bestActions = [a for a, v in zip(actions, values) if v == bestValue]
    if Directions.STOP in bestActions and len(bestActions) > 1:
      bestActions.remove(Directions.STOP)
    return random.choice(bestActions)

  def getSuccessor(self, gameState, action):
    successor = gameState.generateSuccessor(self.index, action)
    pos = successor.getAgentState(self.index).getPosition()
    if pos != nearestPoint(pos):
      return successor.generateSuccessor(self.index, action)
    return successor

  def getHomeEntries(self, gameState):
    walls = gameState.getWalls()
    x = self.width // 2 - 1 if self.red else self.width // 2
    entries = []
    for y in range(1, self.height - 1):
      if not walls[x][y]:
        entries.append((x, y))
    return entries or [self.start]

  def minDistance(self, pos, targets):
    if pos is None or not targets:
      return 0
    return min(self.getMazeDistance(pos, target) for target in targets)

  def closestTarget(self, pos, targets):
    if not targets:
      return None
    return min(targets, key=lambda target: self.getMazeDistance(pos, target))

  def activeGhostDistances(self, gameState, pos):
    distances = []
    if pos is None:
      return distances
    for opponent in self.getOpponents(gameState):
      enemy = gameState.getAgentState(opponent)
      enemyPos = enemy.getPosition()
      if enemyPos is None:
        continue
      if not enemy.isPacman and enemy.scaredTimer <= 1:
        distances.append(self.getMazeDistance(pos, enemyPos))
    return distances

  def evaluateAction(self, gameState, action):
    return 0


class RiskAwareCourierAgent(TargetAgent):
  carryHomeWeight = -5
  ghostClosePenalty = -2000
  ghostDangerRadius = 5
  scoreWeight = 10000
  targetDistanceWeight = -20

  def chooseAction(self, gameState):
    target, returning = self.chooseTarget(gameState)
    if target is None:
      myPos = gameState.getAgentState(self.index).getPosition()
      target = self.closestTarget(myPos, self.homeEntries)

    plannedAction = self.plannedAction(gameState, target)
    if plannedAction is not None and not self.badFirstStep(gameState, plannedAction):
      return plannedAction

    return self.greedyFallback(gameState, target, returning)

  def evaluateAction(self, gameState, action):
    successor = self.getSuccessor(gameState, action)
    oldState = gameState.getAgentState(self.index)
    newState = successor.getAgentState(self.index)
    myPos = newState.getPosition()

    target, returning = self.chooseTarget(gameState)
    if target is None:
      target = self.closestTarget(myPos, self.homeEntries)

    value = self.scoreWeight * self.getScore(successor)
    value += self.targetDistanceWeight * self.getMazeDistance(myPos, target)

    homeDistance = self.minDistance(myPos, self.homeEntries)
    if newState.numCarrying > 0:
      value += self.carryHomeWeight * newState.numCarrying * homeDistance
    if returning:
      value += -12 * homeDistance

    ghostDistances = self.activeGhostDistances(successor, myPos)
    if ghostDistances:
      closestGhost = min(ghostDistances)
      if closestGhost <= self.ghostDangerRadius:
        value += self.ghostClosePenalty * (self.ghostDangerRadius + 1 - closestGhost)

    if action == Directions.STOP:
      value -= 500
    reverse = Directions.REVERSE[oldState.configuration.direction]
    if action == reverse:
      value -= 5

    if self.likelyDied(gameState, successor):
      value -= 8000
    return value

  def plannedAction(self, gameState, target):
    start = gameState.getAgentState(self.index).getPosition()
    if start is None or target is None:
      return None
    start = nearestPoint(start)
    target = nearestPoint(target)
    if start == target:
      return None

    walls = gameState.getWalls()
    ghosts = self.activeGhostPositions(gameState)
    frontier = []
    counter = 0
    bestCost = {start: 0}
    heapq.heappush(frontier, (self.getMazeDistance(start, target), 0, counter, start, None))

    while frontier and counter < self.width * self.height * 4:
      priority, cost, _, pos, firstAction = heapq.heappop(frontier)
      if cost != bestCost.get(pos):
        continue
      if pos == target:
        return firstAction

      for action, nextPos in self.neighborPositions(pos, walls):
        stepCost = 1 + self.plannerRisk(nextPos, ghosts)
        nextCost = cost + stepCost
        if nextCost >= bestCost.get(nextPos, 999999):
          continue
        counter += 1
        bestCost[nextPos] = nextCost
        first = action if firstAction is None else firstAction
        heuristic = self.getMazeDistance(nextPos, target)
        heapq.heappush(frontier, (nextCost + heuristic, nextCost, counter, nextPos, first))
    return None

  def neighborPositions(self, pos, walls):
    x, y = pos
    candidates = [
      (Directions.NORTH, (x, y + 1)),
      (Directions.SOUTH, (x, y - 1)),
      (Directions.EAST, (x + 1, y)),
      (Directions.WEST, (x - 1, y)),
    ]
    result = []
    for action, nextPos in candidates:
      nx, ny = nextPos
      if nx < 0 or nx >= self.width or ny < 0 or ny >= self.height:
        continue
      if not walls[nx][ny]:
        result.append((action, nextPos))
    return result

  def plannerRisk(self, pos, ghosts):
    if not ghosts:
      return 0
    risk = 0
    for ghostPos in ghosts:
      distance = self.getMazeDistance(pos, ghostPos)
      if distance <= 1:
        risk += 80
      elif distance <= 2:
        risk += 30
      elif distance <= self.ghostDangerRadius:
        risk += 5 * (self.ghostDangerRadius + 1 - distance)
    return risk

  def activeGhostPositions(self, gameState):
    positions = []
    for opponent in self.getOpponents(gameState):
      enemy = gameState.getAgentState(opponent)
      enemyPos = enemy.getPosition()
      if enemyPos is None:
        continue
      if not enemy.isPacman and enemy.scaredTimer <= 1:
        positions.append(nearestPoint(enemyPos))
    return positions

  def badFirstStep(self, gameState, action):
    if action == Directions.STOP:
      return True
    successor = self.getSuccessor(gameState, action)
    if self.likelyDied(gameState, successor):
      return True
    myState = successor.getAgentState(self.index)
    myPos = myState.getPosition()
    if not myState.isPacman:
      return False
    ghostDistances = self.activeGhostDistances(successor, myPos)
    return bool(ghostDistances and min(ghostDistances) <= 1)

  def greedyFallback(self, gameState, target, returning):
    actions = gameState.getLegalActions(self.index)
    values = []
    for action in actions:
      successor = self.getSuccessor(gameState, action)
      myState = successor.getAgentState(self.index)
      myPos = myState.getPosition()
      value = self.scoreWeight * self.getScore(successor)
      if target is not None:
        value += self.targetDistanceWeight * self.getMazeDistance(myPos, target)
      if returning:
        value -= 12 * self.minDistance(myPos, self.homeEntries)
      ghostDistances = self.activeGhostDistances(successor, myPos)
      if ghostDistances and myState.isPacman:
        closestGhost = min(ghostDistances)
        if closestGhost <= self.ghostDangerRadius:
          value += self.ghostClosePenalty * (self.ghostDangerRadius + 1 - closestGhost)
      if action == Directions.STOP:
        value -= 500
      reverse = Directions.REVERSE[gameState.getAgentState(self.index).configuration.direction]
      if action == reverse:
        value -= 5
      if self.likelyDied(gameState, successor):
        value -= 8000
      values.append(value)

    bestValue = max(values)
    bestActions = [a for a, v in zip(actions, values) if v == bestValue]
    if Directions.STOP in bestActions and len(bestActions) > 1:
      bestActions.remove(Directions.STOP)
    return random.choice(bestActions)

  def chooseTarget(self, gameState):
    myState = gameState.getAgentState(self.index)
    myPos = myState.getPosition()
    food = self.getFood(gameState).asList()
    home = self.closestTarget(myPos, self.homeEntries)

    if self.shouldReturnHome(gameState):
      return home, True
    if len(food) <= 2:
      return home, True
    if not food:
      return home, True

    capsuleTarget = self.chooseCapsuleTarget(gameState, myPos)
    if capsuleTarget is not None:
      return capsuleTarget, False

    return self.bestRoundTripFood(gameState, myPos, food), False

  def shouldReturnHome(self, gameState):
    myState = gameState.getAgentState(self.index)
    myPos = myState.getPosition()
    carrying = myState.numCarrying
    if carrying <= 0:
      return False

    carryThreshold = self.carryThreshold(gameState)
    if carrying >= carryThreshold:
      return True

    ghostDistances = self.activeGhostDistances(gameState, myPos)
    if myState.isPacman and ghostDistances and min(ghostDistances) <= self.ghostDangerRadius:
      return True

    homeDistance = self.minDistance(myPos, self.homeEntries)
    timeToBank = 4 * (homeDistance + 5)
    if gameState.data.timeleft <= max(60, timeToBank):
      return True
    return False

  def carryThreshold(self, gameState):
    if self.compactLayout:
      return 1
    if self.getScore(gameState) >= 0:
      return 2
    return 3

  def bestRoundTripFood(self, gameState, pos, food):
    ghosts = self.activeGhostPositions(gameState)
    return min(
      food,
      key=lambda target: (
        self.getMazeDistance(pos, target) +
        self.minDistance(target, self.homeEntries) +
        self.foodRisk(target, ghosts),
        self.foodRisk(target, ghosts),
        self.getMazeDistance(pos, target)
      )
    )

  def foodRisk(self, target, ghosts):
    if not ghosts:
      return 0
    closestGhost = min(self.getMazeDistance(target, ghost) for ghost in ghosts)
    if closestGhost <= 1:
      return 80
    if closestGhost <= 2:
      return 35
    if closestGhost <= self.ghostDangerRadius:
      return 6 * (self.ghostDangerRadius + 1 - closestGhost)
    return 0

  def chooseCapsuleTarget(self, gameState, pos):
    capsules = self.getCapsules(gameState)
    if not capsules:
      return None
    myState = gameState.getAgentState(self.index)
    if not myState.isPacman:
      return None
    ghostDistances = self.activeGhostDistances(gameState, pos)
    if not ghostDistances or min(ghostDistances) > 7:
      return None
    return min(
      capsules,
      key=lambda target: (
        self.getMazeDistance(pos, target) + self.minDistance(target, self.homeEntries),
        self.getMazeDistance(pos, target)
      )
    )

  def likelyDied(self, gameState, successor):
    oldState = gameState.getAgentState(self.index)
    newState = successor.getAgentState(self.index)
    oldPos = oldState.getPosition()
    newPos = newState.getPosition()
    if oldState.isPacman and newPos == self.start and oldPos != self.start:
      return True
    if oldState.numCarrying > 0 and newState.numCarrying == 0:
      return self.getScore(successor) <= self.getScore(gameState)
    return False


class CourierDefensiveAgent(TargetAgent):
  def registerInitialState(self, gameState):
    TargetAgent.registerInitialState(self, gameState)
    self.patrolTarget = self.choosePatrolTarget(gameState)
    self.lastEatenFood = None
    self.currentTarget = self.patrolTarget
    self.holdingPortal = False
    self.chasingInvader = False

  def chooseAction(self, gameState):
    self.updateLastEatenFood(gameState)
    self.currentTarget = self.chooseDefenseTarget(gameState)
    return TargetAgent.chooseAction(self, gameState)

  def chooseDefenseTarget(self, gameState):
    invader = self.primaryInvader(gameState)
    self.holdingPortal = False
    self.chasingInvader = False

    if invader is not None:
      myPos = gameState.getAgentPosition(self.index)
      invaderPos = invader.getPosition()
      portal = self.nearestHomeEntry(invaderPos)
      distToInvader = self.getMazeDistance(myPos, invaderPos)
      invaderToPortal = self.getMazeDistance(invaderPos, portal)
      nearCenter = invaderToPortal <= max(4, self.width // 6)
      catchSoon = distToInvader <= 2 or distToInvader <= invaderToPortal
      pressureChase = invader.numCarrying < 2 and distToInvader <= invaderToPortal + 2

      if catchSoon or pressureChase:
        self.chasingInvader = True
        return invaderPos
      if invader.numCarrying >= 2 or nearCenter or invaderToPortal < distToInvader:
        self.holdingPortal = True
        return portal
      self.chasingInvader = True
      return invaderPos

    if self.lastEatenFood is not None:
      return self.lastEatenFood
    if self.patrolTarget is None:
      self.patrolTarget = self.choosePatrolTarget(gameState)
    return self.patrolTarget

  def evaluateAction(self, gameState, action):
    successor = self.getSuccessor(gameState, action)
    myState = successor.getAgentState(self.index)
    myPos = myState.getPosition()
    invaders = self.visibleInvaders(successor)
    target = self.currentTarget or self.choosePatrolTarget(successor)

    value = 0
    if myState.isPacman:
      value -= 1200

    targetDistance = self.getMazeDistance(myPos, target)
    value -= 12 * targetDistance

    if invaders:
      closestInvaderDistance = min(
        self.getMazeDistance(myPos, invader.getPosition()) for invader in invaders
      )
      if self.chasingInvader:
        value -= 10 * closestInvaderDistance
      if closestInvaderDistance <= 1 and myState.scaredTimer == 0:
        value += 500
      if myState.scaredTimer > 0 and closestInvaderDistance <= 2:
        value -= 250

    if action == Directions.STOP:
      if self.holdingPortal and targetDistance == 0:
        value -= 1
      else:
        value -= 100
    reverse = Directions.REVERSE[gameState.getAgentState(self.index).configuration.direction]
    if action == reverse:
      value -= 2
    return value

  def primaryInvader(self, gameState):
    invaders = self.visibleInvaders(gameState)
    if not invaders:
      return None
    myPos = gameState.getAgentPosition(self.index)
    return max(
      invaders,
      key=lambda invader: (
        invader.numCarrying,
        -self.getMazeDistance(myPos, invader.getPosition())
      )
    )

  def nearestHomeEntry(self, pos):
    return min(self.homeEntries, key=lambda entry: self.getMazeDistance(pos, entry))

  def visibleInvaders(self, gameState):
    enemies = [gameState.getAgentState(index) for index in self.getOpponents(gameState)]
    return [enemy for enemy in enemies if enemy.isPacman and enemy.getPosition() is not None]

  def updateLastEatenFood(self, gameState):
    previous = self.getPreviousObservation()
    if previous is None:
      return
    previousFood = set(self.getFoodYouAreDefending(previous).asList())
    currentFood = set(self.getFoodYouAreDefending(gameState).asList())
    eaten = list(previousFood - currentFood)
    if eaten:
      myPos = gameState.getAgentPosition(self.index)
      self.lastEatenFood = min(eaten, key=lambda pos: self.getMazeDistance(myPos, pos))

  def choosePatrolTarget(self, gameState):
    defendingFood = self.getFoodYouAreDefending(gameState).asList()
    if not defendingFood:
      return random.choice(self.homeEntries)
    center = (
      sum(x for x, y in defendingFood) / float(len(defendingFood)),
      sum(y for x, y in defendingFood) / float(len(defendingFood))
    )
    return min(
      self.homeEntries,
      key=lambda pos: abs(pos[0] - center[0]) + abs(pos[1] - center[1])
    )