download.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. // Copyright (c) 2021 Tulir Asokan
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this
  5. // file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6. package whatsmeow
  7. import (
  8. "context"
  9. "crypto/hmac"
  10. "crypto/sha256"
  11. "encoding/base64"
  12. "errors"
  13. "fmt"
  14. "io"
  15. "net"
  16. "net/http"
  17. "strings"
  18. "time"
  19. "go.mau.fi/util/retryafter"
  20. "google.golang.org/protobuf/proto"
  21. "google.golang.org/protobuf/reflect/protoreflect"
  22. "git.bobomao.top/joey/testwh/proto/waE2E"
  23. "git.bobomao.top/joey/testwh/proto/waHistorySync"
  24. "git.bobomao.top/joey/testwh/proto/waMediaTransport"
  25. "git.bobomao.top/joey/testwh/proto/waServerSync"
  26. "git.bobomao.top/joey/testwh/socket"
  27. "git.bobomao.top/joey/testwh/util/cbcutil"
  28. "git.bobomao.top/joey/testwh/util/hkdfutil"
  29. )
  30. // MediaType represents a type of uploaded file on WhatsApp.
  31. // The value is the key which is used as a part of generating the encryption keys.
  32. type MediaType string
  33. // The known media types
  34. const (
  35. MediaImage MediaType = "WhatsApp Image Keys"
  36. MediaVideo MediaType = "WhatsApp Video Keys"
  37. MediaAudio MediaType = "WhatsApp Audio Keys"
  38. MediaDocument MediaType = "WhatsApp Document Keys"
  39. MediaHistory MediaType = "WhatsApp History Keys"
  40. MediaAppState MediaType = "WhatsApp App State Keys"
  41. MediaStickerPack MediaType = "WhatsApp Sticker Pack Keys"
  42. MediaLinkThumbnail MediaType = "WhatsApp Link Thumbnail Keys"
  43. )
  44. // DownloadableMessage represents a protobuf message that contains attachment info.
  45. //
  46. // All of the downloadable messages inside a Message struct implement this interface
  47. // (ImageMessage, VideoMessage, AudioMessage, DocumentMessage, StickerMessage).
  48. type DownloadableMessage interface {
  49. GetDirectPath() string
  50. GetMediaKey() []byte
  51. GetFileSHA256() []byte
  52. GetFileEncSHA256() []byte
  53. }
  54. type MediaTypeable interface {
  55. GetMediaType() MediaType
  56. }
  57. // DownloadableThumbnail represents a protobuf message that contains a thumbnail attachment.
  58. //
  59. // This is primarily meant for link preview thumbnails in ExtendedTextMessage.
  60. type DownloadableThumbnail interface {
  61. proto.Message
  62. GetThumbnailDirectPath() string
  63. GetThumbnailSHA256() []byte
  64. GetThumbnailEncSHA256() []byte
  65. GetMediaKey() []byte
  66. }
  67. // All the message types that are intended to be downloadable
  68. var (
  69. _ DownloadableMessage = (*waE2E.ImageMessage)(nil)
  70. _ DownloadableMessage = (*waE2E.AudioMessage)(nil)
  71. _ DownloadableMessage = (*waE2E.VideoMessage)(nil)
  72. _ DownloadableMessage = (*waE2E.DocumentMessage)(nil)
  73. _ DownloadableMessage = (*waE2E.StickerMessage)(nil)
  74. _ DownloadableMessage = (*waE2E.StickerPackMessage)(nil)
  75. _ DownloadableMessage = (*waHistorySync.StickerMetadata)(nil)
  76. _ DownloadableMessage = (*waE2E.HistorySyncNotification)(nil)
  77. _ DownloadableMessage = (*waServerSync.ExternalBlobReference)(nil)
  78. _ DownloadableThumbnail = (*waE2E.ExtendedTextMessage)(nil)
  79. )
  80. type downloadableMessageWithLength interface {
  81. DownloadableMessage
  82. GetFileLength() uint64
  83. }
  84. type downloadableMessageWithSizeBytes interface {
  85. DownloadableMessage
  86. GetFileSizeBytes() uint64
  87. }
  88. type downloadableMessageWithURL interface {
  89. DownloadableMessage
  90. GetURL() string
  91. }
  92. var classToMediaType = map[protoreflect.Name]MediaType{
  93. "ImageMessage": MediaImage,
  94. "AudioMessage": MediaAudio,
  95. "VideoMessage": MediaVideo,
  96. "DocumentMessage": MediaDocument,
  97. "StickerMessage": MediaImage,
  98. "StickerMetadata": MediaImage,
  99. "StickerPackMessage": MediaStickerPack,
  100. "HistorySyncNotification": MediaHistory,
  101. "ExternalBlobReference": MediaAppState,
  102. }
  103. var classToThumbnailMediaType = map[protoreflect.Name]MediaType{
  104. "ExtendedTextMessage": MediaLinkThumbnail,
  105. }
  106. var mediaTypeToMMSType = map[MediaType]string{
  107. MediaImage: "image",
  108. MediaAudio: "audio",
  109. MediaVideo: "video",
  110. MediaDocument: "document",
  111. MediaHistory: "md-msg-hist",
  112. MediaAppState: "md-app-state",
  113. MediaStickerPack: "sticker-pack",
  114. MediaLinkThumbnail: "thumbnail-link",
  115. }
  116. // DownloadAny loops through the downloadable parts of the given message and downloads the first non-nil item.
  117. //
  118. // Deprecated: it's recommended to find the specific message type you want to download manually and use the Download method instead.
  119. func (cli *Client) DownloadAny(ctx context.Context, msg *waE2E.Message) (data []byte, err error) {
  120. if msg == nil {
  121. return nil, ErrNothingDownloadableFound
  122. }
  123. switch {
  124. case msg.ImageMessage != nil:
  125. return cli.Download(ctx, msg.ImageMessage)
  126. case msg.VideoMessage != nil:
  127. return cli.Download(ctx, msg.VideoMessage)
  128. case msg.AudioMessage != nil:
  129. return cli.Download(ctx, msg.AudioMessage)
  130. case msg.DocumentMessage != nil:
  131. return cli.Download(ctx, msg.DocumentMessage)
  132. case msg.StickerMessage != nil:
  133. return cli.Download(ctx, msg.StickerMessage)
  134. default:
  135. return nil, ErrNothingDownloadableFound
  136. }
  137. }
  138. func getSize(msg DownloadableMessage) int {
  139. switch sized := msg.(type) {
  140. case downloadableMessageWithLength:
  141. return int(sized.GetFileLength())
  142. case downloadableMessageWithSizeBytes:
  143. return int(sized.GetFileSizeBytes())
  144. default:
  145. return -1
  146. }
  147. }
  148. // ReturnDownloadWarnings controls whether the Download function returns non-fatal validation warnings.
  149. // Currently, these include [ErrFileLengthMismatch] and [ErrInvalidMediaSHA256].
  150. var ReturnDownloadWarnings = true
  151. // DownloadThumbnail downloads a thumbnail from a message.
  152. //
  153. // This is primarily intended for downloading link preview thumbnails, which are in ExtendedTextMessage:
  154. //
  155. // var msg *waE2E.Message
  156. // ...
  157. // thumbnailImageBytes, err := cli.DownloadThumbnail(msg.GetExtendedTextMessage())
  158. func (cli *Client) DownloadThumbnail(ctx context.Context, msg DownloadableThumbnail) ([]byte, error) {
  159. mediaType, ok := classToThumbnailMediaType[msg.ProtoReflect().Descriptor().Name()]
  160. if !ok {
  161. return nil, fmt.Errorf("%w '%s'", ErrUnknownMediaType, string(msg.ProtoReflect().Descriptor().Name()))
  162. } else if len(msg.GetThumbnailDirectPath()) > 0 {
  163. return cli.DownloadMediaWithPath(ctx, msg.GetThumbnailDirectPath(), msg.GetThumbnailEncSHA256(), msg.GetThumbnailSHA256(), msg.GetMediaKey(), -1, mediaType, mediaTypeToMMSType[mediaType])
  164. } else {
  165. return nil, ErrNoURLPresent
  166. }
  167. }
  168. // GetMediaType returns the MediaType value corresponding to the given protobuf message.
  169. func GetMediaType(msg DownloadableMessage) MediaType {
  170. protoReflecter, ok := msg.(proto.Message)
  171. if !ok {
  172. mediaTypeable, ok := msg.(MediaTypeable)
  173. if !ok {
  174. return ""
  175. }
  176. return mediaTypeable.GetMediaType()
  177. }
  178. return classToMediaType[protoReflecter.ProtoReflect().Descriptor().Name()]
  179. }
  180. // Download downloads the attachment from the given protobuf message.
  181. //
  182. // The attachment is a specific part of a Message protobuf struct, not the message itself, e.g.
  183. //
  184. // var msg *waE2E.Message
  185. // ...
  186. // imageData, err := cli.Download(msg.GetImageMessage())
  187. //
  188. // You can also use DownloadAny to download the first non-nil sub-message.
  189. func (cli *Client) Download(ctx context.Context, msg DownloadableMessage) ([]byte, error) {
  190. if cli == nil {
  191. return nil, ErrClientIsNil
  192. }
  193. mediaType := GetMediaType(msg)
  194. if mediaType == "" {
  195. return nil, fmt.Errorf("%w %T", ErrUnknownMediaType, msg)
  196. }
  197. urlable, ok := msg.(downloadableMessageWithURL)
  198. var url string
  199. var isWebWhatsappNetURL bool
  200. if ok {
  201. url = urlable.GetURL()
  202. isWebWhatsappNetURL = strings.HasPrefix(url, "https://web.whatsapp.net")
  203. }
  204. if len(url) > 0 && !isWebWhatsappNetURL {
  205. return cli.downloadAndDecrypt(ctx, url, msg.GetMediaKey(), mediaType, getSize(msg), msg.GetFileEncSHA256(), msg.GetFileSHA256())
  206. } else if len(msg.GetDirectPath()) > 0 {
  207. return cli.DownloadMediaWithPath(ctx, msg.GetDirectPath(), msg.GetFileEncSHA256(), msg.GetFileSHA256(), msg.GetMediaKey(), getSize(msg), mediaType, mediaTypeToMMSType[mediaType])
  208. } else {
  209. if isWebWhatsappNetURL {
  210. cli.Log.Warnf("Got a media message with a web.whatsapp.net URL (%s) and no direct path", url)
  211. }
  212. return nil, ErrNoURLPresent
  213. }
  214. }
  215. func (cli *Client) DownloadFB(
  216. ctx context.Context,
  217. transport *waMediaTransport.WAMediaTransport_Integral,
  218. mediaType MediaType,
  219. ) ([]byte, error) {
  220. return cli.DownloadMediaWithPath(ctx, transport.GetDirectPath(), transport.GetFileEncSHA256(), transport.GetFileSHA256(), transport.GetMediaKey(), -1, mediaType, mediaTypeToMMSType[mediaType])
  221. }
  222. // DownloadMediaWithPath downloads an attachment by manually specifying the path and encryption details.
  223. func (cli *Client) DownloadMediaWithPath(
  224. ctx context.Context,
  225. directPath string,
  226. encFileHash, fileHash, mediaKey []byte,
  227. fileLength int,
  228. mediaType MediaType,
  229. mmsType string,
  230. ) (data []byte, err error) {
  231. if !strings.HasPrefix(directPath, "/") {
  232. return nil, fmt.Errorf("media download path does not start with slash: %s", directPath)
  233. }
  234. var mediaConn *MediaConn
  235. mediaConn, err = cli.refreshMediaConn(ctx, false)
  236. if err != nil {
  237. return nil, fmt.Errorf("failed to refresh media connections: %w", err)
  238. }
  239. if len(mmsType) == 0 {
  240. mmsType = mediaTypeToMMSType[mediaType]
  241. }
  242. for i, host := range mediaConn.Hosts {
  243. // TODO omit hash for unencrypted media?
  244. mediaURL := fmt.Sprintf("https://%s%s&hash=%s&mms-type=%s&__wa-mms=", host.Hostname, directPath, base64.URLEncoding.EncodeToString(encFileHash), mmsType)
  245. data, err = cli.downloadAndDecrypt(ctx, mediaURL, mediaKey, mediaType, fileLength, encFileHash, fileHash)
  246. if err == nil ||
  247. errors.Is(err, ErrFileLengthMismatch) ||
  248. errors.Is(err, ErrInvalidMediaSHA256) ||
  249. errors.Is(err, ErrMediaDownloadFailedWith403) ||
  250. errors.Is(err, ErrMediaDownloadFailedWith404) ||
  251. errors.Is(err, ErrMediaDownloadFailedWith410) ||
  252. errors.Is(err, context.Canceled) {
  253. return
  254. } else if i >= len(mediaConn.Hosts)-1 {
  255. return nil, fmt.Errorf("failed to download media from last host: %w", err)
  256. }
  257. cli.Log.Warnf("Failed to download media: %s, trying with next host...", err)
  258. }
  259. return
  260. }
  261. func (cli *Client) downloadAndDecrypt(
  262. ctx context.Context,
  263. url string,
  264. mediaKey []byte,
  265. appInfo MediaType,
  266. fileLength int,
  267. fileEncSHA256,
  268. fileSHA256 []byte,
  269. ) (data []byte, err error) {
  270. iv, cipherKey, macKey, _ := getMediaKeys(mediaKey, appInfo)
  271. var ciphertext, mac []byte
  272. if ciphertext, mac, err = cli.downloadPossiblyEncryptedMediaWithRetries(ctx, url, fileEncSHA256); err != nil {
  273. } else if mediaKey == nil && fileEncSHA256 == nil && mac == nil {
  274. // Unencrypted media, just return the downloaded data
  275. data = ciphertext
  276. } else if err = validateMedia(iv, ciphertext, macKey, mac); err != nil {
  277. } else if data, err = cbcutil.Decrypt(cipherKey, iv, ciphertext); err != nil {
  278. err = fmt.Errorf("failed to decrypt file: %w", err)
  279. } else if ReturnDownloadWarnings {
  280. if fileLength >= 0 && len(data) != fileLength {
  281. err = fmt.Errorf("%w: expected %d, got %d", ErrFileLengthMismatch, fileLength, len(data))
  282. } else if len(fileSHA256) == 32 && sha256.Sum256(data) != *(*[32]byte)(fileSHA256) {
  283. err = ErrInvalidMediaSHA256
  284. }
  285. }
  286. return
  287. }
  288. func getMediaKeys(mediaKey []byte, appInfo MediaType) (iv, cipherKey, macKey, refKey []byte) {
  289. mediaKeyExpanded := hkdfutil.SHA256(mediaKey, nil, []byte(appInfo), 112)
  290. return mediaKeyExpanded[:16], mediaKeyExpanded[16:48], mediaKeyExpanded[48:80], mediaKeyExpanded[80:]
  291. }
  292. func shouldRetryMediaDownload(err error) bool {
  293. if errors.Is(err, context.Canceled) {
  294. return false
  295. }
  296. var netErr net.Error
  297. var httpErr DownloadHTTPError
  298. return errors.As(err, &netErr) ||
  299. strings.HasPrefix(err.Error(), "stream error:") || // hacky check for http2 errors
  300. (errors.As(err, &httpErr) && retryafter.Should(httpErr.StatusCode, true))
  301. }
  302. func (cli *Client) downloadPossiblyEncryptedMediaWithRetries(ctx context.Context, url string, checksum []byte) (file, mac []byte, err error) {
  303. for retryNum := 0; retryNum < 5; retryNum++ {
  304. if checksum == nil {
  305. file, err = cli.downloadMedia(ctx, url)
  306. } else {
  307. file, mac, err = cli.downloadEncryptedMedia(ctx, url, checksum)
  308. }
  309. if err == nil || !shouldRetryMediaDownload(err) {
  310. return
  311. }
  312. retryDuration := time.Duration(retryNum+1) * time.Second
  313. var httpErr DownloadHTTPError
  314. if errors.As(err, &httpErr) {
  315. retryDuration = retryafter.Parse(httpErr.Response.Header.Get("Retry-After"), retryDuration)
  316. }
  317. cli.Log.Warnf("Failed to download media due to network error: %v, retrying in %s...", err, retryDuration)
  318. select {
  319. case <-ctx.Done():
  320. return nil, nil, ctx.Err()
  321. case <-time.After(retryDuration):
  322. }
  323. }
  324. return
  325. }
  326. func (cli *Client) doMediaDownloadRequest(ctx context.Context, url string) (*http.Response, error) {
  327. req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
  328. if err != nil {
  329. return nil, fmt.Errorf("failed to prepare request: %w", err)
  330. }
  331. req.Header.Set("Origin", socket.Origin)
  332. req.Header.Set("Referer", socket.Origin+"/")
  333. if cli.MessengerConfig != nil {
  334. req.Header.Set("User-Agent", cli.MessengerConfig.UserAgent)
  335. }
  336. // TODO user agent for whatsapp downloads?
  337. resp, err := cli.mediaHTTP.Do(req)
  338. if err != nil {
  339. return nil, err
  340. }
  341. if resp.StatusCode != http.StatusOK {
  342. _ = resp.Body.Close()
  343. return nil, DownloadHTTPError{Response: resp}
  344. }
  345. return resp, nil
  346. }
  347. func (cli *Client) downloadMedia(ctx context.Context, url string) ([]byte, error) {
  348. resp, err := cli.doMediaDownloadRequest(ctx, url)
  349. if err != nil {
  350. return nil, err
  351. }
  352. data, err := io.ReadAll(resp.Body)
  353. _ = resp.Body.Close()
  354. return data, err
  355. }
  356. const mediaHMACLength = 10
  357. func (cli *Client) downloadEncryptedMedia(ctx context.Context, url string, checksum []byte) (file, mac []byte, err error) {
  358. data, err := cli.downloadMedia(ctx, url)
  359. if err != nil {
  360. return
  361. } else if len(data) <= mediaHMACLength {
  362. err = ErrTooShortFile
  363. return
  364. }
  365. file, mac = data[:len(data)-mediaHMACLength], data[len(data)-mediaHMACLength:]
  366. if len(checksum) == 32 && sha256.Sum256(data) != *(*[32]byte)(checksum) {
  367. err = ErrInvalidMediaEncSHA256
  368. }
  369. return
  370. }
  371. func validateMedia(iv, file, macKey, mac []byte) error {
  372. h := hmac.New(sha256.New, macKey)
  373. h.Write(iv)
  374. h.Write(file)
  375. if !hmac.Equal(h.Sum(nil)[:mediaHMACLength], mac) {
  376. return ErrInvalidMediaHMAC
  377. }
  378. return nil
  379. }