4 files changed, 86 insertions, 24 deletions
diff --git a/discord/enums.py b/discord/enums.py
index f7df9059..f11f1ed1 100644
--- a/discord/enums.py
+++ b/discord/enums.py
@@ -26,10 +26,10 @@ DEALINGS IN THE SOFTWARE.
 
 from enum import Enum, IntEnum
 
-__all__ = ['ChannelType', 'MessageType', 'VoiceRegion', 'VerificationLevel',
-           'ContentFilter', 'Status', 'DefaultAvatar', 'RelationshipType',
-           'AuditLogAction', 'AuditLogActionCategory', 'UserFlags',
-           'ActivityType', 'HypeSquadHouse', 'NotificationLevel']
+__all__ = ['ChannelType', 'MessageType', 'VoiceRegion', 'SpeakingState',
+           'VerificationLevel', 'ContentFilter', 'Status', 'DefaultAvatar',
+           'RelationshipType', 'AuditLogAction', 'AuditLogActionCategory',
+           'UserFlags', 'ActivityType', 'HypeSquadHouse', 'NotificationLevel']
 
 class ChannelType(Enum):
     text     = 0
@@ -75,6 +75,15 @@ class VoiceRegion(Enum):
     def __str__(self):
         return self.value
 
+class SpeakingState(IntEnum):
+    none       = 0
+    voice      = 1
+    soundshare = 2
+    priority   = 4
+
+    def __str__(self):
+        return self.name
+
 class VerificationLevel(IntEnum):
     none              = 0
     low               = 1
diff --git a/discord/gateway.py b/discord/gateway.py
index eb17c2ef..83699a44 100644
--- a/discord/gateway.py
+++ b/discord/gateway.py
@@ -38,6 +38,7 @@ import websockets
 
 from . import utils
 from .activity import _ActivityTag
+from .enums import SpeakingState
 from .errors import ConnectionClosed, InvalidArgument
 
 log = logging.getLogger(__name__)
@@ -547,6 +548,10 @@ class DiscordVoiceWebSocket(websockets.client.WebSocketClientProtocol):
         Receive only. Tells you that your websocket connection was acknowledged.
     INVALIDATE_SESSION
         Sent only. Tells you that your RESUME request has failed and to re-IDENTIFY.
+    CLIENT_CONNECT
+        Indicates a user has connected to voice.
+    CLIENT_DISCONNECT
+        Receive only.  Indicates a user has disconnected from voice.
     """
 
     IDENTIFY            = 0
@@ -559,6 +564,8 @@ class DiscordVoiceWebSocket(websockets.client.WebSocketClientProtocol):
     RESUME              = 7
     HELLO               = 8
     INVALIDATE_SESSION  = 9
+    CLIENT_CONNECT      = 12
+    CLIENT_DISCONNECT   = 13
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -597,7 +604,7 @@ class DiscordVoiceWebSocket(websockets.client.WebSocketClientProtocol):
     @classmethod
     async def from_client(cls, client, *, resume=False):
         """Creates a voice websocket for the :class:`VoiceClient`."""
-        gateway = 'wss://' + client.endpoint + '/?v=3'
+        gateway = 'wss://' + client.endpoint + '/?v=4'
         ws = await websockets.connect(gateway, loop=client.loop, klass=cls, compression=None)
         ws.gateway = gateway
         ws._connection = client
@@ -610,7 +617,7 @@ class DiscordVoiceWebSocket(websockets.client.WebSocketClientProtocol):
 
         return ws
 
-    async def select_protocol(self, ip, port):
+    async def select_protocol(self, ip, port, mode):
         payload = {
             'op': self.SELECT_PROTOCOL,
             'd': {
@@ -618,18 +625,28 @@ class DiscordVoiceWebSocket(websockets.client.WebSocketClientProtocol):
                 'data': {
                     'address': ip,
                     'port': port,
-                    'mode': 'xsalsa20_poly1305'
+                    'mode': mode
                 }
             }
         }
 
         await self.send_as_json(payload)
 
-    async def speak(self, is_speaking=True):
+    async def client_connect(self):
+        payload = {
+            'op': self.CLIENT_CONNECT,
+            'd': {
+                'audio_ssrc': self._connection.ssrc
+            }
+        }
+
+        await self.send_as_json(payload)
+
+    async def speak(self, state=SpeakingState.voice):
         payload = {
             'op': self.SPEAKING,
             'd': {
-                'speaking': is_speaking,
+                'speaking': int(state),
                 'delay': 0
             }
         }
@@ -642,9 +659,6 @@ class DiscordVoiceWebSocket(websockets.client.WebSocketClientProtocol):
         data = msg.get('d')
 
         if op == self.READY:
-            interval = data['heartbeat_interval'] / 1000.0
-            self._keep_alive = VoiceKeepAliveHandler(ws=self, interval=interval)
-            self._keep_alive.start()
             await self.initial_connection(data)
         elif op == self.HEARTBEAT_ACK:
             self._keep_alive.ack()
@@ -652,7 +666,12 @@ class DiscordVoiceWebSocket(websockets.client.WebSocketClientProtocol):
             log.info('Voice RESUME failed.')
             await self.identify()
         elif op == self.SESSION_DESCRIPTION:
+            self._connection.mode = data['mode']
             await self.load_secret_key(data)
+        elif op == self.HELLO:
+            interval = data['heartbeat_interval'] / 1000.0
+            self._keep_alive = VoiceKeepAliveHandler(ws=self, interval=interval)
+            self._keep_alive.start()
 
     async def initial_connection(self, data):
         state = self._connection
@@ -673,15 +692,23 @@ class DiscordVoiceWebSocket(websockets.client.WebSocketClientProtocol):
         # the port is a little endian unsigned short in the last two bytes
         # yes, this is different endianness from everything else
         state.port = struct.unpack_from('<H', recv, len(recv) - 2)[0]
-
         log.debug('detected ip: %s port: %s', state.ip, state.port)
-        await self.select_protocol(state.ip, state.port)
-        log.info('selected the voice protocol for use')
+
+        # there *should* always be at least one supported mode (xsalsa20_poly1305)
+        modes = [mode for mode in data['modes'] if mode in self._connection.supported_modes]
+        log.debug('received supported encryption modes: %s', ", ".join(modes))
+
+        mode = modes[0]
+        await self.select_protocol(state.ip, state.port, mode)
+        log.info('selected the voice protocol for use (%s)', mode)
+
+        await self.client_connect()
 
     async def load_secret_key(self, data):
         log.info('received secret key for voice connection')
         self._connection.secret_key = data.get('secret_key')
         await self.speak()
+        await self.speak(False)
 
     async def poll_event(self):
         try:
diff --git a/discord/player.py b/discord/player.py
index 74cd073f..8ee3bd13 100644
--- a/discord/player.py
+++ b/discord/player.py
@@ -27,6 +27,7 @@ DEALINGS IN THE SOFTWARE.
 import threading
 import subprocess
 import audioop
+import asyncio
 import logging
 import shlex
 import time
@@ -261,6 +262,7 @@ class AudioPlayer(threading.Thread):
 
         # getattr lookup speed ups
         play_audio = self.client.send_audio_packet
+        self._speak(True)
 
         while not self._end.is_set():
             # are we paused?
@@ -309,14 +311,19 @@ class AudioPlayer(threading.Thread):
     def stop(self):
         self._end.set()
         self._resumed.set()
+        self._speak(False)
 
-    def pause(self):
+    def pause(self, *, update_speaking=True):
         self._resumed.clear()
+        if update_speaking:
+            self._speak(False)
 
-    def resume(self):
+    def resume(self, *, update_speaking=True):
         self.loops = 0
         self._start = time.time()
         self._resumed.set()
+        if update_speaking:
+            self._speak(True)
 
     def is_playing(self):
         return self._resumed.is_set() and not self._end.is_set()
@@ -326,6 +333,12 @@ class AudioPlayer(threading.Thread):
 
     def _set_source(self, source):
         with self._lock:
-            self.pause()
+            self.pause(update_speaking=False)
             self.source = source
-            self.resume()
+            self.resume(update_speaking=False)
+
+    def _speak(self, speaking):
+        try:
+            asyncio.run_coroutine_threadsafe(self.client.ws.speak(speaking), self.client.loop)
+        except Exception as e:
+            log.info("Speaking call in player failed: %s", e)
diff --git a/discord/voice_client.py b/discord/voice_client.py
index 91480872..6bd293c7 100644
--- a/discord/voice_client.py
+++ b/discord/voice_client.py
@@ -102,6 +102,7 @@ class VoiceClient:
         self._connected = threading.Event()
         self._handshake_complete = asyncio.Event(loop=self.loop)
 
+        self.mode = None
         self._connections = 0
         self.sequence = 0
         self.timestamp = 0
@@ -110,6 +111,10 @@ class VoiceClient:
         self.encoder = opus.Encoder()
 
     warn_nacl = not has_nacl
+    supported_modes = (
+        'xsalsa20_poly1305_suffix',
+        'xsalsa20_poly1305',
+    )
 
     @property
     def guild(self):
@@ -288,22 +293,30 @@ class VoiceClient:
 
     def _get_voice_packet(self, data):
         header = bytearray(12)
-        nonce = bytearray(24)
-        box = nacl.secret.SecretBox(bytes(self.secret_key))
 
-        # Formulate header
+        # Formulate rtp header
         header[0] = 0x80
         header[1] = 0x78
         struct.pack_into('>H', header, 2, self.sequence)
         struct.pack_into('>I', header, 4, self.timestamp)
         struct.pack_into('>I', header, 8, self.ssrc)
 
-        # Copy header to nonce's first 12 bytes
+        encrypt_packet = getattr(self, '_encrypt_' + self.mode)
+        return encrypt_packet(header, data)
+
+    def _encrypt_xsalsa20_poly1305(self, header, data):
+        box = nacl.secret.SecretBox(bytes(self.secret_key))
+        nonce = bytearray(24)
         nonce[:12] = header
 
-        # Encrypt and return the data
         return header + box.encrypt(bytes(data), bytes(nonce)).ciphertext
 
+    def _encrypt_xsalsa20_poly1305_suffix(self, header, data):
+        box = nacl.secret.SecretBox(bytes(self.secret_key))
+        nonce = nacl.utils.random(nacl.secret.SecretBox.NONCE_SIZE)
+
+        return header + box.encrypt(bytes(data), nonce).ciphertext + nonce
+
     def play(self, source, *, after=None):
         """Plays an :class:`AudioSource`.