self.timeout: int = self.rl_config.get("max_trade_duration_candles", 128)
self._last_closed_position: Positions = None
self._last_closed_trade_tick: int = 0
- self._non_profit_steps: int = 0
# self.reward_range = (-1, 1)
if self.force_actions:
logger.info(
self._force_action: Optional[ForceActions] = None
self._last_closed_position: Positions = None
self._last_closed_trade_tick: int = 0
- self._non_profit_steps: int = 0
return self._get_observation(), history
def _get_reward_factor_at_trade_exit(
)
)
- # discourage sitting in position
+ # pnl aware reward for sitting in position
if (
self._position in (Positions.Short, Positions.Long)
and action == Actions.Neutral.value
):
- if pnl < 0:
- self._non_profit_steps += 1
- return factor * (
- pnl - (0.1 * (self._non_profit_steps**2) * abs(pnl))
- ) # time aggressive (quadratic) and loss magnitude aware penalty
- else:
- self._non_profit_steps = 0
- return -1.0 * trade_duration / max_trade_duration
+ return (
+ factor * pnl * np.exp(-0.05 * trade_duration)
+ - trade_duration / max_trade_duration
+ )
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
self._position = Positions.Neutral
self._last_closed_trade_tick = self._last_trade_tick
self._last_trade_tick = None
- self._non_profit_steps = 0
def execute_trade(self, action: int) -> None:
"""