参考自:https://msdn.microsoft.com/en-us/library/hh265136.aspx
#include <amp.h>
#include <amp_math.h>
#include <iostream>
using namespace concurrency;
const int size = 5;
// C++AMP样例
void CppAmpMethod()
{
int aCPP[] = { 1, 2, 3, 4, 5 };
int bCPP[] = { 6, 7, 8, 9, 10 };
int sumCPP[size];
// Create C++ AMP objects.
array_view<const int, 1> a(size, aCPP);
array_view<const int, 1> b(size, bCPP);
array_view<int, 1> sum(size, sumCPP);
sum.discard_data();
parallel_for_each(
// Define the compute domain, which is the set of threads that are created
sum.extent,
// Define the code to run on each thread on the accelerator
[=](index<1> idx) restrict(amp)
{
sum[idx] = a[idx] + b[idx];
}
);
// print the results. The expected output is "7, 9, 11, 13, 15"
for (int i = 0; i < size; i++)
{
std::cout << sum[i] << "\n";
}
}
// array_view用法范例1
void index1()
{
int aCPP[] = { 1, 2, 3, 4, 5 };
array_view<int, 1> a(5, aCPP);
index<1> idx(2);
std::cout << a[idx] << "\n";
// Output: 3
}
// array_view用法范例2
void index2()
{
int aCPP[] = { 1, 2, 3,
4, 5, 6 };
array_view<int, 2> a(2, 3, aCPP);
index<2> idx(1, 2);
std::cout << a[idx] << "\n";
// Output: 6
}
// array_view用法范例3
void index3()
{
int aCPP[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
array_view<int, 3> a(2, 3, 4, aCPP);
// Specifies the element at 3, 1, 0
index<3> idx(0, 1, 3);
std::cout << a[idx] << "\n";
// Output: 8
}
// extent用法范例1
void extent1()
{
int aCPP[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
// There are 3 rows and 4 columns, and the depth is two.
array_view<int, 3> a(2, 3, 4, aCPP);
std::cout << "The number of colmns is " << a.extent[2] << "\n";
std::cout << "The number of rows is " << a.extent[1] << "\n";
std::cout << "The depth is " << a.extent[0] << "\n";
std::cout << "Length in most significant dimension is " << a.extent[0] << "\n";
}
// extent用法范例2
void extent2()
{
int aCPP[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 };
extent<3> e(2, 3, 4);
array_view<int, 3> a(e, aCPP);
std::cout << "The num of columns is " << a.extent[2] << "\n";
std::cout << "The num of rows is " << a.extent[1] << "\n";
std::cout << "The depth is " << a.extent[0] << "\n";
}
// araay范例
void array1()
{
std::vector<int> data(5);
for (int count = 0; count < 5; count++)
{
data[count] = count;
}
array<int, 1> a(5, data.begin(), data.end());
parallel_for_each(
a.extent,
[=, &a](index<1> idx) restrict(amp)
{
a[idx] = a[idx] * 10;
}
);
data = a;
for (int i = 0; i < 5; i++)
{
std::cout << data[i] << "\n";
}
}
// 和cpu共享内存
void shareMemory1()
{
accelerator acc = accelerator(accelerator::default_accelerator);
// Early out if the defult accelerator doesn‘t support shared memory.
if (!acc.supports_cpu_shared_memory)
{
std::cout << "The defult acclerator does not support shared memory " << std::endl;
return;
}
// Override the default CPU access type.
//acc.default_cpu_access_type = access_type_read_write;
// Create an accelerator_view from the default accelerator.
// The accelerator_view inherits its default_cpu_access_type from acc.
accelerator_view acc_v = acc.default_view;
// Create an extent object to size the arrays.
extent<1> ex(10);
// Input array that can be written on the CPU.
array<int, 1> arr_w(ex, acc_v, access_type_write);
// Output array that can be read on the CPU
array<int, 1> arr_r(ex, acc_v, access_type_read);
// Read-write array that can be both written to and read from on the CPU.
array<int, 1> arr_rm(ex, acc_v, access_type_read_write);
}
// parallel_for_each用法范例1
void AddArrays()
{
int aCPP[] = { 1, 2, 3, 4, 5 };
int bCPP[] = { 6, 7, 8, 9, 10 };
int sumCPP[] = { 0, 0, 0, 0, 0 };
array_view<int, 1> a(5, aCPP);
array_view<int, 1> b(5, bCPP);
array_view<int, 1> sum(5, sumCPP);
parallel_for_each(
sum.extent,
[=](index<1> idx) restrict(amp)
{
sum[idx] = a[idx] + b[idx];
}
);
for (int i = 0; i < 5; i++)
{
std::cout << sum[i] << "\n";
}
}
void AddElements(index<1> idx, array_view<int, 1> sum, array_view<int, 1> a, array_view<int, 1> b) restrict(amp)
{
sum[idx] = a[idx] + b[idx];
}
// parallel_for_each用法范例2
void AddArraysWitchFunction()
{
int aCPP[] = { 1, 2, 3, 4, 5 };
int bCPP[] = { 6, 7, 8, 9, 10 };
int sumCPP[] = { 0, 0, 0, 0, 0 };
array_view<int, 1> a(5, aCPP);
array_view<int, 1> b(5, bCPP);
array_view<int, 1> sum(5, sumCPP);
parallel_for_each(
sum.extent,
[=](index<1> idx) restrict(amp)
{
AddElements(idx, sum, a, b);
}
);
for (int i = 0; i < 5; i++)
{
std::cout << sum[i] << "\n";
}
}
// 二维分割切块加速
void acceleratingCode()
{
// Sample data:
int sampledata[] = {
2, 2, 9, 7, 1, 4,
4, 4, 8, 8, 3, 4,
1, 5, 1, 2, 5, 2,
6, 8, 3, 2, 7, 2
};
// The tiles:
// 2 2 9 7 1 4
// 4 4 8 8 3 4
//
// 1 5 1 2 5 2
// 6 8 3 2 7 2
// Averages:
int averagedata[] = {
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
};
array_view<int, 2> sample(4, 6, sampledata);
array_view<int, 2> average(4, 6, averagedata);
parallel_for_each(
// Create threads for sample.extent and divide the extent into 2 x 2 tiles
sample.extent.tile<2, 2>(),
[=](tiled_index<2, 2> idx) restrict(amp)
{
// Create a 2 x 2 array to hold the values in this tile.
tile_static int nums[2][2];
// Copy the values for the tile into the 2 x 2 array.
nums[idx.local[1]][idx.local[0]] = sample[idx.global];
// When all the threads have executed and the 2 x 2 array is complete, find the average.
idx.barrier.wait();
int sum = nums[0][0] + nums[0][1] + nums[1][0] + nums[1][1];
// Copy the average into the array_view.
average[idx.global] = sum / 4;
}
);
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 6; j++)
{
std::cout << average(i, j) << " ";
}
std::cout << "\n";
}
// Output
// 3 3 8 8 3 3
// 3 3 8 8 3 3
// 5 5 2 2 4 4
// 5 5 2 2 4 4
}
// parallel_for_each用法范例3:使用并且的数学库
void MathExample()
{
double numbers[] = { 1.0, 10.0, 60.0, 100.0, 600.0, 1000.0 };
array_view<double, 1> logs(6, numbers);
parallel_for_each(
logs.extent,
[=](index<1> idx) restrict(amp)
{
logs[idx] = concurrency::fast_math::log10(logs[idx]);
}
);
for (int i = 0; i < 6; i++)
{
std::cout << logs[i] << "\n";
}
}
int main()
{
CppAmpMethod();
//index1();
//index2();
//index3();
//extent1();
//extent2();
//array1();
//shareMemory1();
//AddArrays();
//AddArraysWitchFunction();
//acceleratingCode();
//MathExample();
return 1;
}
原文:http://www.cnblogs.com/WuhanLiukai/p/4545453.html