Rheolef  7.1
an efficient C++ finite element environment
csr_mpi.cc
Go to the documentation of this file.
1 # include "rheolef/config.h"
22 
23 #ifdef _RHEOLEF_HAVE_MPI
24 #include "rheolef/dis_macros.h"
25 #include "rheolef/csr.h"
26 #include "rheolef/asr_to_csr.h"
27 #include "rheolef/asr_to_csr_dist_logical.h"
28 
29 #include "rheolef/csr_amux.h"
30 #include "rheolef/csr_cumul_trans_mult.h"
31 #include "rheolef/mpi_scatter_init.h"
32 #include "rheolef/mpi_scatter_begin.h"
33 #include "rheolef/mpi_scatter_end.h"
34 
35 using namespace std;
36 namespace rheolef {
37 // ----------------------------------------------------------------------------
38 // useful class-functions
39 // ----------------------------------------------------------------------------
40 #include <algorithm> // for lower_bound
41 template <class Pair1, class Pair2, class RandomIterator>
42 struct op_ext2glob_t : unary_function<Pair1,Pair2> {
43  Pair2 operator()(const Pair1& x) const {
44  return Pair2(t[x.first], x.second); }
45  op_ext2glob_t(RandomIterator t1) : t(t1) {}
46  RandomIterator t;
47 };
48 template <class Pair1, class Pair2>
49 struct op_dia_t : unary_function<Pair1,Pair2> {
50  Pair2 operator()(const Pair1& x) const {
51  return Pair2(shift + x.first, x.second); }
52  typedef typename Pair1::first_type Size;
53  op_dia_t(Size s) : shift(s) {}
54  Size shift;
55 };
56 template <class Pair1, class Pair2, class RandomIterator>
57 struct op_dis_j2jext_t : unary_function<Pair1,Pair2> {
58  Pair2 operator()(const Pair1& x) const {
59  RandomIterator t = std::lower_bound(t1, t2, x.first);
60  assert_macro(*t == x.first, "problem in ditributed asr_to_csr");
61  return Pair2(distance(t1,t), x.second); }
62  op_dis_j2jext_t(RandomIterator u1, RandomIterator u2) : t1(u1), t2(u2) {}
63  RandomIterator t1, t2;
64 };
65 // ----------------------------------------------------------------------------
66 // allocators
67 // ----------------------------------------------------------------------------
68 template<class T>
70  : base(),
71  _ext(),
72  _jext2dis_j(0),
73  _dis_nnz(0),
74  _dis_ext_nnz(0),
75  _scatter_initialized(false),
76  _from(),
77  _to(),
78  _buffer()
79 {
80 }
81 template<class T>
83  : base(a),
84  _ext(a._ext),
85  _jext2dis_j(a._jext2dis_j),
86  _dis_nnz(a._dis_nnz),
87  _dis_ext_nnz(a._dis_ext_nnz),
88  _scatter_initialized(a._scatter_initialized),
89  _from(a._from),
90  _to(a._to),
91  _buffer(a._buffer)
92 {
93  // "physical copy of csr"
94 }
95 template<class T>
96 void
97 csr_rep<T,distributed>::resize (const distributor& row_ownership, const distributor& col_ownership, size_type nnz1)
98 {
99  base::resize (row_ownership, col_ownership, nnz1);
100  _ext.resize (row_ownership, col_ownership, 0); // note: the _ext part will be resized elsewhere
101  _scatter_initialized = false;
102 }
103 template<class T>
104 template<class A>
105 void
107 {
108  base::resize (a.row_ownership(), a.col_ownership(), 0);
109  _ext.resize (a.row_ownership(), a.col_ownership(), 0);
110 
112  typedef typename asr<T>::row_type row_type;
113  typedef typename row_type::value_type const_pair_type;
114  typedef pair<size_type,T> pair_type;
116  is_dia(col_ownership().first_index(), col_ownership().last_index());
117  //
118  // step 1: compute pointers
119  //
120  set<size_type> colext;
121 
122  size_type nnzext
123  = asr_to_csr_dist_logical (a.begin(), a.end(), is_dia, colext);
124  //
125  // step 2: resize and copy jext2dis_j
126  //
127  size_type nnzdia = a.nnz() - nnzext;
128  size_type ncoldia = col_ownership().size();
129  size_type ncolext = colext.size();
130  base::resize(a.row_ownership(), a.col_ownership(), nnzdia);
131  _ext.resize (a.row_ownership(), a.col_ownership(), nnzext);
132  _jext2dis_j.resize (ncolext);
133  copy (colext.begin(), colext.end(), _jext2dis_j.begin());
134  //
135  // step 3: copy values
136  // column indexes of a in 0..dis_ncol
137  // of dia in 0..ncoldia
138  // of dia in 0..ncolext
139  op_dia_t<const_pair_type,pair_type> op_dia(- col_ownership().first_index());
140  // step 3.a: copy dia part
141  asr_to_csr (
142  a.begin(),
143  a.end(),
144  is_dia,
145  op_dia,
146  base::begin(),
147  base::_data.begin());
148 
149  // step 3.b: copy ext part
150  unary_negate<is_dia_t<size_type, const_pair_type> >
151  is_ext(is_dia);
152  op_dis_j2jext_t<const_pair_type, pair_type, typename vector<size_type>::const_iterator>
153  op_dis_j2jext(_jext2dis_j.begin(), _jext2dis_j.end());
154  asr_to_csr (
155  a.begin(), a.end(), is_ext, op_dis_j2jext,
156  _ext.begin(), _ext._data.begin());
157 
158  // compute _dis_nnz:
159  _dis_nnz = a.dis_nnz();
160  _dis_ext_nnz = mpi::all_reduce (comm(), _ext.nnz(), std::plus<size_type>());
161  _scatter_initialized = false;
162 #ifdef TO_CLEAN
163  _dis_nnz = mpi::all_reduce (comm(), nnz() + _ext.nnz(), std::plus<size_type>());
164  check_macro (_dis_nnz == a.dis_nnz(), "build_from_asr: mistake: asr::dis_nnz="<<a.dis_nnz()
165  << " while _dis_nnz="<<_dis_nnz);
166 #endif // TO_CLEAN
167 }
168 // messages building for A*x and trans(A)*x
169 template<class T>
170 void
172 {
173  vector<size_type> id (_jext2dis_j.size());
174  for (size_type i = 0, n = id.size(); i < n; i++) id[i] = i;
175 
177 
179  _jext2dis_j.size(),
180  _jext2dis_j.begin().operator->(),
181  id.size(),
182  id.begin().operator->(),
183  dis_ncol(),
184  col_ownership().begin().operator->(),
185  tag,
186  row_ownership().comm(),
187  _from,
188  _to);
189 
190  _buffer.resize(_jext2dis_j.size());
191 }
192 // ----------------------------------------------------------------------------
193 // io
194 // ----------------------------------------------------------------------------
195 template<class T>
196 odiststream&
198 {
199  // put all on io_proc
200  size_type io_proc = odiststream::io_proc();
201  size_type my_proc = comm().rank();
202  distributor a_row_ownership (dis_nrow(), comm(), (my_proc == io_proc ? dis_nrow() : 0));
203  distributor a_col_ownership (dis_ncol(), comm(), (my_proc == io_proc ? dis_ncol() : 0));
204  typedef std::allocator<T> A; // TODO: use heap_alloc for asr
205  asr<T,distributed,A> a (a_row_ownership, a_col_ownership);
206  size_type first_dis_i = row_ownership().first_index();
207  size_type first_dis_j = col_ownership().first_index();
208  if (nnz() != 0) {
209  const_iterator ia = begin();
210  for (size_type i = 0; i < nrow(); i++) {
211  for (const_data_iterator p = ia[i]; p < ia[i+1]; p++) {
212  const size_type& j = (*p).first;
213  const T& val = (*p).second;
214  a.dis_entry (i+first_dis_i, j+first_dis_j) += val;
215  }
216  }
217  }
218  if (_ext.nnz() != 0) {
219  const_iterator ext_ia = _ext.begin();
220  for (size_type i = 0; i < nrow(); i++) {
221  for (const_data_iterator p = ext_ia[i]; p < ext_ia[i+1]; p++) {
222  const size_type& j = (*p).first;
223  const T& val = (*p).second;
224  a.dis_entry (i+first_dis_i, _jext2dis_j[j]) += val;
225  }
226  }
227  }
228  a.dis_entry_assembly();
229  if (my_proc == io_proc) {
230  a.put_seq (ops);
231  }
232  return ops;
233 }
234 template<class T>
235 void
236 csr_rep<T,distributed>::dump (const string& name) const
237 {
238  odiststream ops;
239  std::string filename = name + itos(comm().rank());
240  ops.open (filename, "mtx", comm());
241  check_macro(ops.good(), "\"" << filename << "[.mtx]\" cannot be created.");
242  ops << "%%MatrixMarket matrix coordinate real general" << std::endl
243  << dis_nrow() << " " << dis_ncol() << " " << dis_nnz() << std::endl;
244  put(ops);
245 }
246 // ----------------------------------------------------------------------------
247 // blas2: basic linear algebra
248 // ----------------------------------------------------------------------------
249 template<class T>
250 void
252  const vec<T,distributed>& x,
254  const
255 {
256  check_macro (x.size() == ncol(), "csr*vec: incompatible csr("<<nrow()<<","<<ncol()<<") and vec("<<x.size()<<")");
257  y.resize (row_ownership());
258 
260 
261  // initialize _from and _to scatter messages
262  _scatter_init_guard();
263 
264  // send x to others
266  x.begin().operator->(),
267  _buffer.begin().operator->(),
268  _from,
269  _to,
270  set_op<T,T>(),
271  tag,
272  row_ownership().comm());
273 
274  // y := dia*x
275  csr_amux (
276  base::begin(),
277  base::end(),
278  x.begin(),
279  set_op<T,T>(),
280  y.begin());
281 
282  // receive tmp from others
284  x.begin(),
285  _buffer.begin(),
286  _from,
287  _to,
288  set_op<T,T>(),
289  tag,
290  row_ownership().comm());
291 
292  // y += ext*tmp
293  csr_amux (_ext.begin(), _ext.end(), _buffer.begin(), set_add_op<T,T>(), y.begin());
294 }
295 template<class T>
296 void
298  const vec<T,distributed>& x,
300  const
301 {
302  check_macro (x.size() == nrow(), "csr.trans_mult(vec): incompatible csr("<<nrow()<<","<<ncol()<<") and vec("<<x.size()<<")");
303 
304  // initialize _from and _to scatter messages
305  _scatter_init_guard();
306 
307  y.resize (col_ownership());
308 
309  // y = dia*x
310  std::fill (y.begin(), y.end(), T(0));
312  base::begin(),
313  base::end(),
314  x.begin(),
315  set_add_op<T,T>(),
316  y.begin());
317 
318  // buffer = ext*x
319  std::fill (_buffer.begin(), _buffer.end(), T(0));
321  _ext.begin(),
322  _ext.end(),
323  x.begin(),
324  set_add_op<T,T>(),
325  _buffer.begin());
326 
327  // send buffer to others parts of y (+=)
330  _buffer.begin().operator->(),
331  y.begin().operator->(),
332  _to, // reverse mode
333  _from,
334  set_add_op<T,T>(), // +=
335  tag,
336  col_ownership().comm());
337 
338  // receive buffer from others
340  _buffer.begin(),
341  y.begin(),
342  _to, // reverse mode
343  _from,
344  set_add_op<T,T>(), // +=
345  tag,
346  col_ownership().comm());
347 }
348 // ----------------------------------------------------------------------------
349 // blas3: basic linear algebra
350 // ----------------------------------------------------------------------------
351 template<class T>
354 {
356  _ext.operator*= (lambda);
357  return *this;
358 }
359 // ----------------------------------------------------------------------------
360 // expression c=a+b and c=a-b with a temporary c=*this
361 // ----------------------------------------------------------------------------
362 // NOTE: cet algo pourrait servir aussi au cas diag (dans csr_seq.cc)
363 // a condition de mettre des pseudo-renumerotations et d'enlever
364 // le set.insert dans la 1ere passe. Pas forcement plus lisible...
365 template<class T, class BinaryOp>
366 void
368  const csr_rep<T,sequential>& a, const std::vector<typename csr<T>::size_type>& jext_a2dis_j,
369  const csr_rep<T,sequential>& b, const std::vector<typename csr<T>::size_type>& jext_b2dis_j,
370  csr_rep<T,sequential>& c, std::vector<typename csr<T>::size_type>& jext_c2dis_j,
371  BinaryOp binop)
372 {
374  typedef typename csr_rep<T,distributed>::iterator iterator;
376  typedef typename csr_rep<T,distributed>::data_iterator data_iterator;
377  typedef typename csr_rep<T,distributed>::const_data_iterator const_data_iterator;
378  typedef std::pair<size_type,T> pair_type;
379  //
380  // first pass: compute nnz_c and resize
381  //
382  size_type nnz_ext_c = 0;
383  const size_type infty = std::numeric_limits<size_type>::max();
384  const_iterator ia = a.begin();
385  const_iterator ib = b.begin();
386  std::set<size_type> jext_c_set;
387  for (size_type i = 0, n = a.nrow(); i < n; i++) {
388  for (const_data_iterator iter_jva = ia[i], last_jva = ia[i+1],
389  iter_jvb = ib[i], last_jvb = ib[i+1];
390  iter_jva != last_jva || iter_jvb != last_jvb; ) {
391 
392  size_type dis_ja = iter_jva == last_jva ? infty : jext_a2dis_j [(*iter_jva).first];
393  size_type dis_jb = iter_jvb == last_jvb ? infty : jext_b2dis_j [(*iter_jvb).first];
394  if (dis_ja == dis_jb) {
395  jext_c_set.insert (dis_ja);
396  iter_jva++;
397  iter_jvb++;
398  } else if (dis_ja < dis_jb) {
399  jext_c_set.insert (dis_ja);
400  iter_jva++;
401  } else {
402  jext_c_set.insert (dis_jb);
403  iter_jvb++;
404  }
405  nnz_ext_c++;
406  }
407  }
408  c.resize (a.nrow(), b.ncol(), nnz_ext_c);
409  jext_c2dis_j.resize (jext_c_set.size());
410  std::copy (jext_c_set.begin(), jext_c_set.end(), jext_c2dis_j.begin());
411  //
412  // second pass: add and store in c
413  //
414  op_dis_j2jext_t<pair_type, pair_type, typename vector<size_type>::const_iterator>
415  op_dis_j2jext_c (jext_c2dis_j.begin(), jext_c2dis_j.end());
416  data_iterator iter_jvc = c._data.begin().operator->();
417  iterator ic = c.begin();
418  *ic++ = iter_jvc;
419  for (size_type i = 0, n = a.nrow(); i < n; i++) {
420  for (const_data_iterator iter_jva = ia[i], last_jva = ia[i+1],
421  iter_jvb = ib[i], last_jvb = ib[i+1];
422  iter_jva != last_jva || iter_jvb != last_jvb; ) {
423 
424  size_type dis_ja = iter_jva == last_jva ? infty : jext_a2dis_j [(*iter_jva).first];
425  size_type dis_jb = iter_jvb == last_jvb ? infty : jext_b2dis_j [(*iter_jvb).first];
426  if (dis_ja == dis_jb) {
427  *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_ja, binop((*iter_jva).second, (*iter_jvb).second)));
428  iter_jva++;
429  iter_jvb++;
430  } else if (dis_ja < dis_jb) {
431  *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_ja, binop((*iter_jva).second, T(0))));
432  iter_jva++;
433  } else {
434  *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_jb, binop(T(0),(*iter_jvb).second)));
435  iter_jvb++;
436  }
437  }
438  *ic++ = iter_jvc;
439  }
440 }
441 template<class T>
442 template<class BinaryOp>
443 void
445  const csr_rep<T,distributed>& a,
446  const csr_rep<T,distributed>& b,
447  BinaryOp binop)
448 {
449  check_macro (a.dis_nrow() == b.dis_nrow() && a.dis_ncol() == b.dis_ncol(),
450  "a+b: invalid matrix a("<<a.dis_nrow()<<","<<a.dis_ncol()<<") and b("
451  <<b.dis_nrow()<<","<<b.dis_ncol()<<")");
452  check_macro (a.nrow() == b.nrow() && a.ncol() == b.ncol(),
453  "a+b: matrix local distribution mismatch: a("<<a.nrow()<<","<<a.ncol()<<") and b("
454  <<b.nrow()<<","<<b.ncol()<<")");
455 
456  // 1) the diagonal part:
457  base::assign_add (a, b, binop);
458 
459  // 2) the extra-diagonal part:
460  csr_ext_add (
461  a._ext, a._jext2dis_j,
462  b._ext, b._jext2dis_j,
463  _ext, _jext2dis_j,
464  binop);
465 
466  _dis_nnz = mpi::all_reduce (comm(), nnz() + _ext.nnz(), std::plus<size_type>());
467  _dis_ext_nnz = mpi::all_reduce (comm(), _ext.nnz(), std::plus<size_type>());
468  _scatter_initialized = false;
469 
470 #ifdef TO_CLEAN
471  // 3) scatter init for a*x :
472  vector<size_type> id(_jext2dis_j.size());
473  for (size_type i = 0; i < id.size(); i++) id[i] = i;
475 
476  _buffer.resize (_jext2dis_j.size());
478  _jext2dis_j.size(),
479  _jext2dis_j.begin().operator->(),
480  id.size(),
481  id.begin().operator->(),
482  dis_ncol(),
483  col_ownership().begin().operator->(),
484  tag,
485  row_ownership().comm(),
486  _from,
487  _to);
488 #endif // TO_CLEAN
489 }
490 // ----------------------------------------------------------------------------
491 // trans(a)
492 // ----------------------------------------------------------------------------
493 template<class T>
494 void
496 {
497  //
498  // first: assembly all _ext parts of the a matrix in b=trans(a)
499  //
500  asr<T> b_ext (col_ownership(), row_ownership());
501  size_type first_i = row_ownership().first_index();
502  const_iterator ext_ia = ext_begin();
503  for (size_type i = 0, n = nrow(); i < n; i++) {
504  size_type dis_i = first_i + i;
505  for (const_data_iterator p = ext_ia[i], last_p = ext_ia[i+1]; p < last_p; p++) {
506  size_type dis_j = jext2dis_j ((*p).first);
507  const T& val = (*p).second;
508  b_ext.dis_entry (dis_j, dis_i) += val;
509  }
510  }
511  b_ext.dis_entry_assembly();
512  b.build_from_asr (b_ext);
513  //
514  // second: add all _diag parts
515  //
516  base::build_transpose (b);
517  //
518  // third: update dis_nnz by adding all diag nnz
519  //
520  b._dis_nnz = mpi::all_reduce (comm(), b.nnz() + b._ext.nnz(), std::plus<size_type>());
521  b._dis_ext_nnz = mpi::all_reduce (comm(), b._ext.nnz(), std::plus<size_type>());
522  b._scatter_initialized = false;
523 }
524 // ----------------------------------------------------------------------------
525 // set symmetry by check
526 // ----------------------------------------------------------------------------
527 template<class T>
528 void
530 {
531  if (dis_nrow() != dis_ncol()) {
532  set_symmetry(false);
533  return;
534  }
535  // check if |a-trans(a)| == 0 up to machine prec
537  build_transpose (at);
538  d.assign_add (*this, at, std::minus<T>());
539  set_symmetry (d.max_abs() <= tol);
540 }
541 // ----------------------------------------------------------------------------
542 // expression c=a*b with a temporary c=*this
543 // ----------------------------------------------------------------------------
544 template<class T>
545 void
547  const csr_rep<T,distributed>& a,
548  const csr_rep<T,distributed>& b)
549 {
550  // TODO: distributed csr*csr could be simplified
551  // TODO: when a._jext2dis_j.size()==0 on all procs: no comms needed
552  // TODO: and the sequential algo could be used
553  check_macro (a.col_ownership() == b.row_ownership(),
554  "incompatible csr([0:"<<a.nrow()<<"|"<<a.dis_nrow()<<"[x"
555  <<"[0:"<<a.ncol()<<"|"<<a.dis_ncol()<<"[)"
556  "*csr([0:"<<b.nrow()<<"|"<<b.dis_nrow()<<"[x"
557  <<"[0:"<<b.ncol()<<"|"<<b.dis_ncol()<<"[)");
558  //
559  // 1) compress a non-empty column indexes numbering: creates a_zip_j numbering
560  //
561  size_type a_dia_ncol = a.col_ownership().size();
562  size_type a_ext_ncol = a._jext2dis_j.size(); // number of external columns; compressed
563  size_type first_a_dis_j = a.col_ownership().first_index();
564  std::map<size_type,size_type> a_dis_j2a_zip_j;
565  size_type a_zip_j = 0;
566  for (size_type jext = 0; jext < a_ext_ncol && a._jext2dis_j [jext] < first_a_dis_j; jext++, a_zip_j++) {
567  // row < local row index
568  size_type dis_j = a._jext2dis_j [jext];
569  a_dis_j2a_zip_j [dis_j] = a_zip_j;
570  }
571  size_type jext_up = a_zip_j;
572  for (size_type j = 0; j < a_dia_ncol; j++, a_zip_j++) {
573  // local rows
574  size_type dis_j = first_a_dis_j + j;
575  a_dis_j2a_zip_j [dis_j] = a_zip_j;
576  }
577  for (size_type jext = jext_up; jext < a_ext_ncol; jext++, a_zip_j++) {
578  // row > local row index
579  size_type dis_j = a._jext2dis_j [jext];
580  a_dis_j2a_zip_j [dis_j] = a_zip_j;
581  }
582  size_type a_zip_ncol = a_dia_ncol + a_ext_ncol;
583  //
584  // 2) create a seq matrix B_seq = submatrix of B by taking rows of B that equal to nonzero col of A
585  // requiers comms
586  //
587  typedef std::allocator<T> A; // TODO: use heap_alloc for asr
588  A alloc;
589  // 2.1) convert b in asr distributed format
590  asr<T,distributed,A> b_asr (b, alloc);
591  // 2.2) make available all external columns that are used in a
592  b_asr.set_dis_indexes (a._jext2dis_j);
593  // 2.3) convert local part of b_asr into b_asr_zip
594  distributor a_zip_col_ownership (distributor::decide, b.comm(), a_zip_ncol);
595  distributor b_zip_row_ownership = a_zip_col_ownership;
596  asr<T,sequential,A> b_asr_zip (b_zip_row_ownership, b.col_ownership());
597  size_type first_dis_j = b.row_ownership().first_index();
598  size_type j = 0;
600  iter = b_asr.begin(),
601  last = b_asr.end(); iter != last; ++iter, ++j) {
602  typedef typename asr<T,distributed,A>::row_type row_type;
603  size_type dis_j = first_dis_j + j;
604  size_type zip_j = a_dis_j2a_zip_j [dis_j];
605  const row_type& row = *iter;
606  for (typename row_type::const_iterator
607  row_iter = row.begin(),
608  row_last = row.end(); row_iter != row_last; ++row_iter) {
609  size_type dis_k = (*row_iter).first;
610  const T& value = (*row_iter).second;
611  b_asr_zip.semi_dis_entry (zip_j, dis_k) = value;
612  }
613  }
614  // 2.4) convert external part of b_asr into b_asr_zip
615  typedef typename asr<T,distributed,A>::scatter_map_type ext_row_type;
616  const ext_row_type& b_ext_row_map = b_asr.get_dis_map_entries();
617  for (typename ext_row_type::const_iterator
618  iter = b_ext_row_map.begin(),
619  last = b_ext_row_map.end(); iter != last; ++iter) {
620  typedef typename asr<T,distributed,A>::row_type row_type;
621  size_type dis_j = (*iter).first;
622  size_type zip_j = a_dis_j2a_zip_j [dis_j];
623  const row_type& row = (*iter).second;
624  for (typename row_type::const_iterator
625  row_iter = row.begin(),
626  row_last = row.end(); row_iter != row_last; ++row_iter) {
627  size_type dis_k = (*row_iter).first;
628  const T& value = (*row_iter).second;
629  b_asr_zip.semi_dis_entry (zip_j, dis_k) = value;
630  }
631  }
632  b_asr_zip.dis_entry_assembly(); // recount nnz
633  // 2.5) convert b_asr_zip into csr: b_zip
634  csr<T,sequential> b_zip (b_asr_zip);
635  //
636  // 3) create a seq matrix a_zip = submatrix of A by compressing all local cols of A
637  // no comms requiered: a_zip := zip(a_dia, a_ext)
638  //
639  // 3.1) assembly a local asr matrix
640  asr<T,sequential> a_asr_zip (a.row_ownership(), a_zip_col_ownership);
641  size_type first_dis_i = a.row_ownership().first_index();
642  const_iterator dia_ia = a.begin();
643  const_iterator ext_ia = a.ext_begin();
644  for (size_type i = 0, n = a.nrow(); i < n; ++i) {
645  size_type dis_i = first_dis_i + i;
646  for (const_data_iterator p = dia_ia[i], last_p = dia_ia[i+1]; p != last_p; ++p) {
647  size_type j = (*p).first;
648  const T& value = (*p).second;
649  size_type dis_j = first_dis_j + j;
650  size_type zip_j = a_dis_j2a_zip_j [dis_j];
651  a_asr_zip.semi_dis_entry (i, zip_j) += value;
652  }
653  if (a.ext_nnz() == 0) continue;
654  for (const_data_iterator p = ext_ia[i], last_p = ext_ia[i+1]; p != last_p; ++p) {
655  size_type jext = (*p).first;
656  const T& value = (*p).second;
657  size_type dis_j = a._jext2dis_j [jext];
658  size_type zip_j = a_dis_j2a_zip_j [dis_j];
659  a_asr_zip.semi_dis_entry (i, zip_j) += value;
660  }
661  }
662  a_asr_zip.dis_entry_assembly();
663  // 3.2) convert to sequential csr
664  csr<T,sequential> a_zip (a_asr_zip);
665  //
666  // 4) compute the product sequentially (no comms requiered)
667  //
668  csr<T,sequential> c_zip = a_zip*b_zip;
669  //
670  // 5) create mpi matrix C by concatenating C_tmp on all procs
671  // no comms requiered: C = (C_dia,C_ext) = dispatch(C_tmp)
672  // 5.1) assembly in distributed asr
673  asr<T,distributed> c_asr_zip (a.row_ownership(), b.col_ownership());
674  const_iterator ic = c_zip.begin();
675  for (size_type i = 0, n = c_zip.nrow(); i < n; ++i) {
676  size_type dis_i = first_dis_i + i;
677  for (const_data_iterator p = ic[i], last_p = ic[i+1]; p != last_p; ++p) {
678  size_type dis_k = (*p).first;
679  const T& value = (*p).second;
680  c_asr_zip.semi_dis_entry (i, dis_k) += value;
681  }
682  }
683  c_asr_zip.dis_entry_assembly();
684  // 5.2) copy c_zip intro c_asr_zip
685  build_from_asr (c_asr_zip);
686 }
687 // ----------------------------------------------------------------------------
688 // instanciation in library
689 // ----------------------------------------------------------------------------
690 #ifdef _RHEOLEF_DO_NOT_USE_HEAP_ALLOCATOR
691 #define _RHEOLEF_instanciate_class(T) \
692 template void csr_rep<T,distributed>::build_from_asr (const asr<T,distributed,std::allocator<T> >&);
693 #else // _RHEOLEF_DO_NOT_USE_HEAP_ALLOCATOR
694 #define _RHEOLEF_instanciate_class(T) \
695 template void csr_rep<T,distributed>::build_from_asr (const asr<T,distributed,std::allocator<T> >&); \
696 template void csr_rep<T,distributed>::build_from_asr (const asr<T,distributed,heap_allocator<T> >&);
697 #endif // _RHEOLEF_DO_NOT_USE_HEAP_ALLOCATOR
698 
699 #define _RHEOLEF_istanciate(T) \
700 template class csr_rep<T,distributed>; \
701 template void csr_rep<T,distributed>::assign_add ( \
702  const csr_rep<T,distributed>&, \
703  const csr_rep<T,distributed>&, \
704  std::plus<T>); \
705 template void csr_rep<T,distributed>::assign_add ( \
706  const csr_rep<T,distributed>&, \
707  const csr_rep<T,distributed>&, \
708  std::minus<T>); \
709 _RHEOLEF_instanciate_class(T)
710 
712 
713 } // namespace rheolef
714 # endif // _RHEOLEF_HAVE_MPI
void put(idiststream &in, odiststream &out, bool do_proj, bool do_lumped_mass, bool def_fill_opt, size_type extract_id, const Float &scale_value, const std::pair< Float, Float > &u_range, render_type render)
Definition: branch.cc:500
field::size_type size_type
Definition: branch.cc:425
see the Float page for the full documentation
T & semi_dis_entry(size_type i, size_type dis_j)
Definition: asr.h:183
void dis_entry_assembly()
Definition: asr.h:112
dis_reference dis_entry(size_type dis_i, size_type dis_j)
Definition: asr.h:193
const_iterator begin() const
Definition: csr.h:383
size_type nrow() const
Definition: csr.h:373
vector_of_iterator< pair_type >::const_value_type const_data_iterator
Definition: csr.h:94
std::pair< size_type, T > pair_type
Definition: csr.h:89
std::vector< T >::size_type size_type
Definition: csr.h:86
vector_of_iterator< pair_type >::const_iterator const_iterator
Definition: csr.h:91
see the csr page for the full documentation
Definition: csr.h:317
see the distributor page for the full documentation
Definition: distributor.h:62
static tag_type get_new_tag()
returns a new tag
Definition: distributor.cc:133
static const size_type decide
Definition: distributor.h:76
odiststream: see the diststream page for the full documentation
Definition: diststream.h:126
void open(std::string filename, std::string suffix="", io::mode_type mode=io::out, const communicator &comm=communicator())
This routine opens a physical output file.
Definition: diststream.cc:143
bool good() const
Definition: diststream.cc:194
static size_type io_proc()
Definition: diststream.cc:78
see the vec page for the full documentation
Definition: vec.h:79
void resize(const distributor &ownership, const T &init_val=std::numeric_limits< T >::max())
Definition: vec.h:199
size_t size_type
Definition: basis_get.cc:76
rheolef::std value
#define _RHEOLEF_istanciate(T)
Definition: csr_mpi.cc:699
#define assert_macro(ok_condition, message)
Definition: dis_macros.h:113
Expr1::float_type T
Definition: field_expr.h:261
check_macro(expr1.have_homogeneous_space(Xh1), "dual(expr1,expr2); expr1 should have homogeneous space. HINT: use dual(interpolate(Xh, expr1),expr2)")
verbose clean transpose logscale grid shrink ball stereo iso volume skipvtk deformation fastfieldload lattice reader_on_stdin color format format format format format format format format format format format format format format format format format format dump
This file is part of Rheolef.
std::enable_if< details::is_rheolef_arithmetic< U >::value,ad3_basic< T > & >::type operator*=(ad3_basic< T > &a, const U &b)
Definition: ad3.h:367
void put(std::ostream &out, std::string name, const tiny_matrix< T > &a)
Definition: tiny_lu.h:155
void mpi_scatter_init(Size nidx, SizeRandomIterator1 idx, Size nidy, SizeRandomIterator2 idy, Size idy_maxval, SizeRandomIterator3 ownership, Tag tag, const distributor::communicator_type &comm, Message &from, Message &to)
void mpi_scatter_end(InputIterator x, OutputIterator y, Message &from, Message &to, SetOp op, Tag tag, Comm comm)
void csr_cumul_trans_mult(InputIterator1 ia, InputIterator1 last_ia, InputIterator3 x, SetOperator set_op, RandomAccessMutableIterator y)
OutputPtrIterator asr_to_csr(InputPtrIterator iter_ptr_a, InputPtrIterator last_ptr_a, Predicate pred, Operation op, OutputPtrIterator iter_ptr_b, OutputDataIterator iter_data_b)
Definition: asr_to_csr.h:70
std::string itos(std::string::size_type i)
itos: see the rheostream page for the full documentation
Set::value_type asr_to_csr_dist_logical(InputPtrIterator iter_ptr_a, InputPtrIterator last_ptr_a, Predicate is_dia, Set &colext)
void mpi_scatter_begin(InputIterator x, OutputIterator y, Message &from, Message &to, SetOp op, Tag tag, Comm comm)
void csr_amux(InputIterator ia, InputIterator last_ia, InputRandomAcessIterator x, SetOperator set_op, OutputIterator y)
Definition: csr_amux.h:77
void csr_ext_add(const csr_rep< T, sequential > &a, const std::vector< typename csr< T >::size_type > &jext_a2dis_j, const csr_rep< T, sequential > &b, const std::vector< typename csr< T >::size_type > &jext_b2dis_j, csr_rep< T, sequential > &c, std::vector< typename csr< T >::size_type > &jext_c2dis_j, BinaryOp binop)
Definition: csr_mpi.cc:367
Definition: sphere.icc:25