The day python beats the crap out of C++. 450ns against ~700ns from iostreams.

Actually C++ is still faster (clang 3.8):

./a.out
285.573 317.542

Python:

660.446166992 543.710947037

C++ code (please keep in mind that most sane std::string implementations have >= 22 initial capacity):

#include <chrono>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <random>
#include <string>
#include <vector>

using namespace std;

static auto gen(size_t n) {
  vector<double> rez(n);
  uniform_int_distribution<int> rnd_exp(-100, 100);
  uniform_real_distribution<double> r01;
  mt19937 eng;

  for (auto &val : rez) {
    val = r01(eng) * pow(10, rnd_exp(eng));
  }

  return rez;
}

static void to_str(double d, string &s) {
  int capacity = s.capacity();
  s.resize(capacity);
  int n = snprintf(&s[0], capacity, "%g", d);
  if (n > capacity) {
    s.resize(n);
    snprintf(&s[0], n + 1, "%g", d);
  }
}

static void test(vector<double> dvals) {
  vector<string> svals(dvals.size());
  auto t0 = chrono::system_clock::now();
  for (size_t i = 0; i < dvals.size(); ++i) {
    to_str(dvals[i], svals[i]);
  }
  auto t1 = chrono::system_clock::now();
  vector<double> xvals(svals.size());
  for (size_t i = 0; i < svals.size(); ++i) {
    xvals[i] = atof(svals[i].c_str());
  }
  auto t2 = chrono::system_clock::now();
  std::cout << 1e9 * chrono::duration<double>(t1 - t0).count() / dvals.size()
            << ' '
            << 1e9 * chrono::duration<double>(t2 - t1).count() / dvals.size()
            << '\n';
}

int main() { test(gen(1000000)); }
/r/cpp Thread Link - rextester.com