Base64 encoding for string input.

[ Merge of http://go/wvgerrit/119805 ] This change adds 3 new functions for encoding binary data from a C++ string to a base64 encoded ASCII string. The CDM and protobuf generated code use C++ strings to store binary data. These binary strings are commonly converted into a base64 encoded ASCII string for logging and for returning to the app. This change also cleans up some of the internal components of the string_conversions library to use several standard library C++11 method. Bug: 181732604 Test: CE CDM unittests Change-Id: I547568c6402e011344260f2df2a06e972122ab8a
2021-03-12 19:27:51 -08:00
parent 30ebbefb40
commit e51f869190
4 changed files with 312 additions and 213 deletions
--- a/libwvdrmengine/cdm/util/src/string_conversions.cpp
+++ b/libwvdrmengine/cdm/util/src/string_conversions.cpp
@@ -10,15 +10,18 @@
 #include <string.h>

 #include <iostream>
-#include <vector>

 #include "log.h"
 #include "platform.h"

 namespace wvcdm {
-
-static const char kBase64Codes[] =
+namespace {
+// Base64 character set, indexed for their 6-bit mapping, plus '='.
+const char kBase64Codes[] =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
+// URL safe Base64 character set.
+const char kBase64SafeCodes[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_=";

 // Gets the low |n| bits of |in|.
 #define GET_LOW_BITS(in, n) ((in) & ((1 << (n)) - 1))
@@ -27,26 +30,131 @@ static const char kBase64Codes[] =
 // Calculates a/b using round-up division (only works for positive numbers).
 #define CEIL_DIVIDE(a, b) ((((a)-1) / (b)) + 1)

-int DecodeBase64Char(char c) {
-  const char* it = strchr(kBase64Codes, c);
+// Decodes a single Base64 encoded character into its 6-bit value.
+// The provided |codes| must be a Base64 character map.
+int DecodeBase64Char(char c, const char* codes) {
+  const char* it = strchr(codes, c);
  if (it == nullptr) return -1;
-  return it - kBase64Codes;
+  return it - codes;
 }

-bool DecodeHexChar(char ch, unsigned char* digit) {
+bool DecodeHexChar(char ch, uint8_t* digit) {
  if (ch >= '0' && ch <= '9') {
    *digit = ch - '0';
-  } else {
-    ch = tolower(ch);
-    if ((ch >= 'a') && (ch <= 'f')) {
-      *digit = ch - 'a' + 10;
-    } else {
-      return false;
+    return true;
+  }
+  ch = tolower(ch);
+  if ((ch >= 'a') && (ch <= 'f')) {
+    *digit = ch - 'a' + 10;
+    return true;
+  }
+  return false;
+}
+
+// Encode for standard base64 encoding (RFC4648).
+// https://en.wikipedia.org/wiki/Base64
+// Text    |       M        |       a       |       n        |
+// ASCI    |   77 (0x4d)    |   97 (0x61)   |   110 (0x6e)   |
+// Bits    | 0 1 0 0 1 1 0 1 0 1 1 0 0 0 0 1 0 1 1 0 1 1 1 0 |
+// Index   |     19     |     22    |      5    |     46     |
+// Base64  |      T     |      W    |      F    |      u     |
+//         | <-----------------  24-bits  -----------------> |
+
+// The provided |codes| must be a Base64 character map.
+std::string Base64EncodeInternal(const uint8_t* data, size_t length,
+                                 const char* codes) {
+  // |temp| stores a 24-bit block that is treated as an array where insertions
+  // occur from high to low.
+  uint32_t temp = 0;
+  size_t out_index = 0;
+  const size_t out_size = CEIL_DIVIDE(length, 3) * 4;
+  std::string result(out_size, '\0');
+  for (size_t i = 0; i < length; i++) {
+    // "insert" 8-bits of data
+    temp |= (data[i] << ((2 - (i % 3)) * 8));
+    if (i % 3 == 2) {
+      result[out_index++] = codes[GET_BITS(temp, 18, 24)];
+      result[out_index++] = codes[GET_BITS(temp, 12, 18)];
+      result[out_index++] = codes[GET_BITS(temp, 6, 12)];
+      result[out_index++] = codes[GET_BITS(temp, 0, 6)];
+      temp = 0;
    }
  }
-  return true;
+  if (length % 3 == 1) {
+    result[out_index++] = codes[GET_BITS(temp, 18, 24)];
+    result[out_index++] = codes[GET_BITS(temp, 12, 18)];
+    result[out_index++] = '=';
+    result[out_index++] = '=';
+  } else if (length % 3 == 2) {
+    result[out_index++] = codes[GET_BITS(temp, 18, 24)];
+    result[out_index++] = codes[GET_BITS(temp, 12, 18)];
+    result[out_index++] = codes[GET_BITS(temp, 6, 12)];
+    result[out_index++] = '=';
+  }
+  return result;
 }

+std::vector<uint8_t> Base64DecodeInternal(const char* encoded, size_t length,
+                                          const char* codes) {
+  const size_t out_size_max = CEIL_DIVIDE(length * 3, 4);
+  std::vector<uint8_t> result(out_size_max, '\0');
+  // |temp| stores 24-bits of data that is treated as an array where insertions
+  // occur from high to low.
+  uint32_t temp = 0;
+  size_t out_index = 0;
+  size_t i;
+  for (i = 0; i < length; i++) {
+    if (encoded[i] == '=') {
+      // Verify an '=' only appears at the end.  We want i to remain at the
+      // first '=', so we need an inner loop.
+      for (size_t j = i; j < length; j++) {
+        if (encoded[j] != '=') {
+          LOGE("base64Decode failed");
+          return std::vector<uint8_t>();
+        }
+      }
+      if (length % 4 != 0) {
+        // If padded, then the length must be a multiple of 4.
+        // Unpadded messages are OK.
+        LOGE("base64Decode failed");
+        return std::vector<uint8_t>();
+      }
+      break;
+    }
+
+    const int decoded = DecodeBase64Char(encoded[i], codes);
+    if (decoded < 0) {
+      LOGE("base64Decode failed");
+      return std::vector<uint8_t>();
+    }
+    // "insert" 6-bits of data
+    temp |= (decoded << ((3 - (i % 4)) * 6));
+
+    if (i % 4 == 3) {
+      result[out_index++] = GET_BITS(temp, 16, 24);
+      result[out_index++] = GET_BITS(temp, 8, 16);
+      result[out_index++] = GET_BITS(temp, 0, 8);
+      temp = 0;
+    }
+  }
+
+  switch (i % 4) {
+    case 1:
+      LOGE("base64Decode failed");
+      return std::vector<uint8_t>();
+    case 2:
+      result[out_index++] = GET_BITS(temp, 16, 24);
+      break;
+    case 3:
+      result[out_index++] = GET_BITS(temp, 16, 24);
+      result[out_index++] = GET_BITS(temp, 8, 16);
+      break;
+  }
+  result.resize(out_index);
+  return result;
+}
+}  // namespace
+
 // converts an ascii hex string(2 bytes per digit) into a decimal byte string
 std::vector<uint8_t> a2b_hex(const std::string& byte) {
  std::vector<uint8_t> array;
@@ -97,161 +205,7 @@ std::string b2a_hex(const std::string& byte) {
                   byte.length());
 }

-// Encode for standard base64 encoding (RFC4648).
-// https://en.wikipedia.org/wiki/Base64
-// Text    |       M        |       a       |       n        |
-// ASCI    |   77 (0x4d)    |   97 (0x61)   |   110 (0x6e)   |
-// Bits    | 0 1 0 0 1 1 0 1 0 1 1 0 0 0 0 1 0 1 1 0 1 1 1 0 |
-// Index   |     19     |     22    |      5    |     46     |
-// Base64  |      T     |      W    |      F    |      u     |
-//         | <-----------------  24-bits  -----------------> |
-std::string Base64Encode(const std::vector<uint8_t>& bin_input) {
-  if (bin_input.empty()) {
-    return std::string();
-  }
-
-  // |temp| stores a 24-bit block that is treated as an array where insertions
-  // occur from high to low.
-  uint32_t temp = 0;
-  size_t out_index = 0;
-  const size_t out_size = CEIL_DIVIDE(bin_input.size(), 3) * 4;
-  std::string result(out_size, '\0');
-  for (size_t i = 0; i < bin_input.size(); i++) {
-    // "insert" 8-bits of data
-    temp |= (bin_input[i] << ((2 - (i % 3)) * 8));
-
-    if (i % 3 == 2) {
-      result[out_index++] = kBase64Codes[GET_BITS(temp, 18, 24)];
-      result[out_index++] = kBase64Codes[GET_BITS(temp, 12, 18)];
-      result[out_index++] = kBase64Codes[GET_BITS(temp, 6, 12)];
-      result[out_index++] = kBase64Codes[GET_BITS(temp, 0, 6)];
-      temp = 0;
-    }
-  }
-
-  if (bin_input.size() % 3 == 1) {
-    result[out_index++] = kBase64Codes[GET_BITS(temp, 18, 24)];
-    result[out_index++] = kBase64Codes[GET_BITS(temp, 12, 18)];
-    result[out_index++] = '=';
-    result[out_index++] = '=';
-  } else if (bin_input.size() % 3 == 2) {
-    result[out_index++] = kBase64Codes[GET_BITS(temp, 18, 24)];
-    result[out_index++] = kBase64Codes[GET_BITS(temp, 12, 18)];
-    result[out_index++] = kBase64Codes[GET_BITS(temp, 6, 12)];
-    result[out_index++] = '=';
-  }
-
-  return result;
-}
-
-// Filename-friendly base64 encoding (RFC4648), commonly referred to
-// as Base64WebSafeEncode.
-//
-// This is the encoding required to interface with the provisioning server, as
-// well as for certain license server transactions.  It is also used for logging
-// certain strings. The difference between web safe encoding vs regular encoding
-// is that the web safe version replaces '+' with '-' and '/' with '_'.
-std::string Base64SafeEncode(const std::vector<uint8_t>& bin_input) {
-  if (bin_input.empty()) {
-    return std::string();
-  }
-
-  std::string ret = Base64Encode(bin_input);
-  for (size_t i = 0; i < ret.size(); i++) {
-    if (ret[i] == '+')
-      ret[i] = '-';
-    else if (ret[i] == '/')
-      ret[i] = '_';
-  }
-  return ret;
-}
-
-std::string Base64SafeEncodeNoPad(const std::vector<uint8_t>& bin_input) {
-  std::string b64_output = Base64SafeEncode(bin_input);
-  // Output size: ceiling [ bin_input.size() * 4 / 3 ].
-  b64_output.resize((bin_input.size() * 4 + 2) / 3);
-  return b64_output;
-}
-
-// Decode for standard base64 encoding (RFC4648).
-std::vector<uint8_t> Base64Decode(const std::string& b64_input) {
-  if (b64_input.empty()) {
-    return std::vector<uint8_t>();
-  }
-
-  const size_t out_size_max = CEIL_DIVIDE(b64_input.size() * 3, 4);
-  std::vector<uint8_t> result(out_size_max, '\0');
-
-  // |temp| stores 24-bits of data that is treated as an array where insertions
-  // occur from high to low.
-  uint32_t temp = 0;
-  size_t out_index = 0;
-  size_t i;
-  for (i = 0; i < b64_input.size(); i++) {
-    if (b64_input[i] == '=') {
-      // Verify an '=' only appears at the end.  We want i to remain at the
-      // first '=', so we need an inner loop.
-      for (size_t j = i; j < b64_input.size(); j++) {
-        if (b64_input[j] != '=') {
-          LOGE("base64Decode failed");
-          return std::vector<uint8_t>();
-        }
-      }
-      break;
-    }
-
-    const int decoded = DecodeBase64Char(b64_input[i]);
-    if (decoded < 0) {
-      LOGE("base64Decode failed");
-      return std::vector<uint8_t>();
-    }
-    // "insert" 6-bits of data
-    temp |= (decoded << ((3 - (i % 4)) * 6));
-
-    if (i % 4 == 3) {
-      result[out_index++] = GET_BITS(temp, 16, 24);
-      result[out_index++] = GET_BITS(temp, 8, 16);
-      result[out_index++] = GET_BITS(temp, 0, 8);
-      temp = 0;
-    }
-  }
-
-  switch (i % 4) {
-    case 1:
-      LOGE("base64Decode failed");
-      return std::vector<uint8_t>();
-    case 2:
-      result[out_index++] = GET_BITS(temp, 16, 24);
-      break;
-    case 3:
-      result[out_index++] = GET_BITS(temp, 16, 24);
-      result[out_index++] = GET_BITS(temp, 8, 16);
-      break;
-  }
-  result.resize(out_index);
-  return result;
-}
-
-// Decode for Filename-friendly base64 encoding (RFC4648), commonly referred
-// as Base64WebSafeDecode. Add padding if needed.
-std::vector<uint8_t> Base64SafeDecode(const std::string& b64_input) {
-  if (b64_input.empty()) {
-    return std::vector<uint8_t>();
-  }
-
-  // Make a copy so we can modify it to replace the web-safe special characters
-  // with the normal ones.
-  std::string input_copy = b64_input;
-  for (size_t i = 0; i < input_copy.size(); i++) {
-    if (input_copy[i] == '-')
-      input_copy[i] = '+';
-    else if (input_copy[i] == '_')
-      input_copy[i] = '/';
-  }
-  return Base64Decode(input_copy);
-}
-
-std::string HexEncode(const uint8_t* in_buffer, unsigned int size) {
+std::string HexEncode(const uint8_t* in_buffer, size_t size) {
  static const char kHexChars[] = "0123456789ABCDEF";
  if (size == 0) return "";
  constexpr unsigned int kMaxSafeSize = 3072;
@@ -267,19 +221,83 @@ std::string HexEncode(const uint8_t* in_buffer, unsigned int size) {
  return out_buffer;
 }

-std::string IntToString(int value) {
-  // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
-  // So round up to allocate 3 output characters per byte, plus 1 for '-'.
-  const int kOutputBufSize = 3 * sizeof(int) + 1;
-  char buffer[kOutputBufSize];
-  memset(buffer, 0, kOutputBufSize);
-  snprintf(buffer, kOutputBufSize, "%d", value);
+// Standard Base64 encoding and decoding.

-  std::string out_string(buffer);
-  return out_string;
+std::string Base64Encode(const std::vector<uint8_t>& bin_input) {
+  if (bin_input.empty()) {
+    return std::string();
+  }
+  return Base64EncodeInternal(bin_input.data(), bin_input.size(), kBase64Codes);
 }

-int64_t htonll64(int64_t x) {  // Convert to big endian (network-byte-order)
+std::string Base64Encode(const std::string& bin_input) {
+  if (bin_input.empty()) {
+    return std::string();
+  }
+  return Base64EncodeInternal(
+      reinterpret_cast<const uint8_t*>(bin_input.data()), bin_input.size(),
+      kBase64Codes);
+}
+
+// Decode for standard base64 encoding (RFC4648).
+std::vector<uint8_t> Base64Decode(const std::string& b64_input) {
+  if (b64_input.empty()) {
+    return std::vector<uint8_t>();
+  }
+  return Base64DecodeInternal(b64_input.data(), b64_input.size(), kBase64Codes);
+}
+
+// URL/Filename Safe Base64 encoding and decoding.
+
+// This is the encoding required to interface with the provisioning server, as
+// well as for certain license server transactions.  It is also used for logging
+// certain strings. The difference between web safe encoding vs regular encoding
+// is that the web safe version replaces '+' with '-' and '/' with '_'.
+std::string Base64SafeEncode(const std::vector<uint8_t>& bin_input) {
+  if (bin_input.empty()) {
+    return std::string();
+  }
+  return Base64EncodeInternal(bin_input.data(), bin_input.size(),
+                              kBase64SafeCodes);
+}
+
+std::string Base64SafeEncode(const std::string& bin_input) {
+  if (bin_input.empty()) {
+    return std::string();
+  }
+  return Base64EncodeInternal(
+      reinterpret_cast<const uint8_t*>(bin_input.data()), bin_input.size(),
+      kBase64SafeCodes);
+}
+
+std::vector<uint8_t> Base64SafeDecode(const std::string& b64_input) {
+  if (b64_input.empty()) {
+    return std::vector<uint8_t>();
+  }
+  return Base64DecodeInternal(b64_input.data(), b64_input.size(),
+                              kBase64SafeCodes);
+}
+
+// URL/Filename Safe Base64 encoding without padding.
+
+std::string Base64SafeEncodeNoPad(const std::vector<uint8_t>& bin_input) {
+  std::string b64_output = Base64SafeEncode(bin_input);
+  // Output size: ceiling [ bin_input.size() * 4 / 3 ].
+  b64_output.resize((bin_input.size() * 4 + 2) / 3);
+  return b64_output;
+}
+
+std::string Base64SafeEncodeNoPad(const std::string& bin_input) {
+  std::string b64_output = Base64SafeEncode(bin_input);
+  // Output size: ceiling [ bin_input.size() * 4 / 3 ].
+  b64_output.resize((bin_input.size() * 4 + 2) / 3);
+  return b64_output;
+}
+
+// Host to Network/Network to Host conversion.
+
+// Convert to big endian (network-byte-order)
+int64_t htonll64(int64_t x) {
  union {
    uint32_t array[2];
    int64_t number;
@@ -296,19 +314,13 @@ int64_t htonll64(int64_t x) {  // Convert to big endian (network-byte-order)
  }
 }

-std::string BytesToString(const uint8_t* bytes, unsigned size) {
-  if (!bytes || !size) return "";
-  const char* char_bytes = reinterpret_cast<const char*>(bytes);
-  return std::string(char_bytes, char_bytes + size);
-}
-
 // Encode unsigned integer into a big endian formatted string
 std::string EncodeUint32(unsigned int u) {
  std::string s;
-  s.append(1, (u >> 24) & 0xFF);
-  s.append(1, (u >> 16) & 0xFF);
-  s.append(1, (u >> 8) & 0xFF);
-  s.append(1, (u >> 0) & 0xFF);
+  s.push_back((u >> 24) & 0xFF);
+  s.push_back((u >> 16) & 0xFF);
+  s.push_back((u >> 8) & 0xFF);
+  s.push_back(u & 0xFF);
  return s;
 }