Actually C++ is still faster (clang 3.8):
./a.out
285.573 317.542
Python:
660.446166992 543.710947037
C++ code (please keep in mind that most sane std::string implementations have >= 22 initial capacity):
#include <chrono>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <random>
#include <string>
#include <vector>
using namespace std;
static auto gen(size_t n) {
vector<double> rez(n);
uniform_int_distribution<int> rnd_exp(-100, 100);
uniform_real_distribution<double> r01;
mt19937 eng;
for (auto &val : rez) {
val = r01(eng) * pow(10, rnd_exp(eng));
}
return rez;
}
static void to_str(double d, string &s) {
int capacity = s.capacity();
s.resize(capacity);
int n = snprintf(&s[0], capacity, "%g", d);
if (n > capacity) {
s.resize(n);
snprintf(&s[0], n + 1, "%g", d);
}
}
static void test(vector<double> dvals) {
vector<string> svals(dvals.size());
auto t0 = chrono::system_clock::now();
for (size_t i = 0; i < dvals.size(); ++i) {
to_str(dvals[i], svals[i]);
}
auto t1 = chrono::system_clock::now();
vector<double> xvals(svals.size());
for (size_t i = 0; i < svals.size(); ++i) {
xvals[i] = atof(svals[i].c_str());
}
auto t2 = chrono::system_clock::now();
std::cout << 1e9 * chrono::duration<double>(t1 - t0).count() / dvals.size()
<< ' '
<< 1e9 * chrono::duration<double>(t2 - t1).count() / dvals.size()
<< '\n';
}
int main() { test(gen(1000000)); }