00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 bool mdp_write_double_as_float(FILE *fp,
00014 void* data,
00015 mdp_int psize,
00016 mdp_int header_size,
00017 mdp_int position,
00018 const mdp_lattice &lattice) {
00019 double *p=(double*) data;
00020 float *q=(float*) malloc(psize/2);
00021 for(uint i=0; i<psize/sizeof(double); i++) q[i]=p[i];
00022 if(fseek(fp, position*psize/2+header_size, SEEK_SET) ||
00023 fwrite(q, psize/2,1, fp)!=1) return false;
00024 free(q);
00025 return true;
00026 }
00027
00028 bool mdp_read_double_as_float(FILE *fp,
00029 void* data,
00030 mdp_int psize,
00031 mdp_int header_size,
00032 mdp_int position,
00033 const mdp_lattice &lattice) {
00034 double *p=(double*) data;
00035 float *q=(float*) malloc(psize/2);
00036 if(fseek(fp, position*psize/2+header_size, SEEK_SET) ||
00037 fread(q, psize/2,1, fp)!=1) return false;
00038 for(uint i=0; i<psize/sizeof(double); i++) p[i]=q[i];
00039 free(q);
00040 return true;
00041 }
00042
00043 bool mdp_write_float_as_double(FILE *fp,
00044 void* data,
00045 mdp_int psize,
00046 mdp_int header_size,
00047 mdp_int position,
00048 const mdp_lattice &lattice) {
00049 float *p=(float*) data;
00050 double *q=(double*) malloc(psize*2);
00051 for(uint i=0; i<psize/sizeof(float); i++) q[i]=p[i];
00052 if(fseek(fp, position*psize*2+header_size, SEEK_SET) ||
00053 fwrite(q, psize*2,1, fp)!=1) return false;
00054 free(q);
00055 return true;
00056 }
00057
00058 bool mdp_read_float_as_double(FILE *fp,
00059 void* data,
00060 mdp_int psize,
00061 mdp_int header_size,
00062 mdp_int position,
00063 const mdp_lattice &lattice) {
00064 float *p=(float*) data;
00065 double *q=(double*) malloc(psize*2);
00066 if(fseek(fp, position*psize*2+header_size, SEEK_SET) ||
00067 fread(q, psize*2,1, fp)!=1) return false;
00068 for(uint i=0; i<psize/sizeof(float); i++) p[i]=q[i];
00069 free(q);
00070 return true;
00071 }
00072
00085 class mdp_complex_field : public mdp_field<mdp_complex> {
00086 public:
00087 mdp_complex_field() {}
00088 mdp_complex_field(mdp_lattice& lattice, int n=1) {
00089 allocate_field(lattice,n);
00090 }
00091 mdp_complex_field(const mdp_complex_field &other) : mdp_field<mdp_complex>(other) {
00092 }
00093 inline void operator= (const mdp_complex_field &psi) {
00094 if(&lattice() != &psi.lattice() ||
00095 size!=psi.size ||
00096 field_components!=psi.field_components)
00097 error("mdp_field: operator=() impatible fields");
00098
00099 mdp_int i=0;
00100
00101 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00102 _sse_double* r=(_sse_double*) m;
00103 _sse_double* s=(_sse_double*) psi.m;
00104
00105 for(; i<size-7; i+=8, r+=8, s+=8) {
00106 _sse_double_prefetch_16(s+8);
00107 _sse_double_copy_16(r,s);
00108 }
00109 #endif
00110
00111 for(; i<size; i++) m[i]=psi.m[i];
00112 }
00113
00114 inline void operator*=(const mdp_complex alpha) {
00115 mdp_int i_min=physical_local_start(EVENODD);
00116 mdp_int i_max=physical_local_stop(EVENODD);
00117 mdp_int i=i_min;
00118 for(; i<i_max; i++) m[i]*=alpha;
00119 }
00120 inline void operator/=(const mdp_complex alpha) {
00121 (*this)*=(1.0/alpha);
00122 }
00123
00124 inline void operator*=(const mdp_real alpha) {
00125 mdp_int i_min=physical_local_start(EVENODD);
00126 mdp_int i_max=physical_local_stop(EVENODD);
00127 mdp_int i=i_min;
00128
00129 if(alpha==1) return;
00130
00131 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00132 static _sse_double c ALIGN16;
00133 _sse_double* r=(_sse_double*) physical_address(i_min);
00134
00135 _sse_check_alignment(&c,0xf);
00136
00137 c.c1=c.c2=alpha;
00138 for(; i<i_max-7; i+=8, r+=8) {
00139 _sse_double_prefetch_16(r+8);
00140 _sse_double_multiply_16(r,c,r);
00141 }
00142 #endif
00143
00144 for(; i<i_max; i++) m[i]*=alpha;
00145 }
00146
00147 inline void operator/=(const mdp_real alpha) {
00148 (*this)*=(1.0/alpha);
00149 }
00150
00151
00152 inline void operator+=(mdp_complex_field &psi) {
00153 mdp_int i_min=psi.physical_local_start(EVENODD);
00154 mdp_int i_max=psi.physical_local_stop(EVENODD);
00155 mdp_int i=i_min;
00156
00157 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00158 _sse_double* r=(_sse_double*) physical_address(i_min);
00159 _sse_double* s=(_sse_double*) psi.physical_address(i_min);
00160
00161 for(; i<i_max-7; i+=8, r+=8, s+=8) {
00162 _sse_double_prefetch_16(s+8);
00163 _sse_double_add_16(r,s);
00164 }
00165 #endif
00166
00167 for(; i<i_max; i++) m[i]+=psi.m[i];
00168 }
00169
00170 inline void operator-=(mdp_complex_field &psi) {
00171 mdp_int i_min=psi.physical_local_start(EVENODD);
00172 mdp_int i_max=psi.physical_local_stop(EVENODD);
00173 mdp_int i=i_min;
00174
00175 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00176 _sse_double* r=(_sse_double*) physical_address(i_min);
00177 _sse_double* s=(_sse_double*) psi.physical_address(i_min);
00178
00179 for(; i<i_max-7; i+=8, r+=8, s+=8) {
00180 _sse_double_prefetch_16(s+8);
00181 _sse_double_sub_16(r,s);
00182 }
00183 #endif
00184
00185 for(; i<i_max; i++) m[i]-=psi.m[i];
00186 }
00187
00188
00189 inline friend mdp_real norm_square(mdp_complex_field &psi,
00190 int parity=EVENODD) {
00191 double n2=0;
00192 mdp_int i_min=psi.physical_local_start(parity);
00193 mdp_int i_max=psi.physical_local_stop(parity);
00194 mdp_int i=i_min;
00195
00196 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00197 static _sse_double c ALIGN16;
00198 _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00199
00200 _sse_check_alignment(&c,0xf);
00201
00202 c.c1=c.c2=0;
00203 for(; i<i_max-7; i+=8, r+=8) {
00204 _sse_double_prefetch_16(r+8);
00205 _sse_double_add_norm_square_16(r,c);
00206 }
00207 n2+=c.c1+c.c2;
00208 #endif
00209
00210 for(; i<i_max; i++)
00211 n2+=abs2(psi[i]);
00212 mpi.add(n2);
00213 return n2;
00214 }
00215
00216 inline friend mdp_complex scalar_product(mdp_complex_field &psi,
00217 mdp_complex_field &chi,
00218 int parity=EVENODD) {
00219 mdp_complex n2=0;
00220 mdp_int i_min=psi.physical_local_start(parity);
00221 mdp_int i_max=psi.physical_local_stop(parity);
00222 mdp_int i=i_min;
00223
00224 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00225 static _sse_double c,d ALIGN16;
00226 _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00227 _sse_double* s=(_sse_double*) chi.physical_address(i_min);
00228
00229 _sse_check_alignment(&c,0xf);
00230
00231 c.c1=c.c2=0;
00232 d.c1=d.c2=0;
00233 for(; i<i_max-7; i+=8, r+=8, s+=8) {
00234 _sse_double_prefetch_16(r+8);
00235 _sse_double_prefetch_16(s+8);
00236 _sse_double_add_real_scalar_product_16(r,s,c);
00237 _sse_double_add_imag_scalar_product_16(r,s,d);
00238 }
00239 n2+=mdp_complex(c.c1+c.c2,d.c2-d.c1);
00240 #endif
00241
00242 for(;i<i_max; i++)
00243 n2+=conj(psi[i])*chi[i];
00244 mpi.add(n2);
00245
00246 return n2;
00247 }
00248
00249 inline friend mdp_real real_scalar_product(mdp_complex_field &psi,
00250 mdp_complex_field &chi,
00251 int parity=EVENODD) {
00252
00253 double n2=0;
00254 mdp_int i_min=psi.physical_local_start(parity);
00255 mdp_int i_max=psi.physical_local_stop(parity);
00256 mdp_int i=i_min;
00257
00258 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00259 static _sse_double c ALIGN16;
00260 _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00261 _sse_double* s=(_sse_double*) chi.physical_address(i_min);
00262
00263 _sse_check_alignment(&c,0xf);
00264
00265 c.c1=c.c2=0;
00266 for(; i<i_max-7; i+=8, r+=8, s+=8) {
00267 _sse_double_prefetch_16(r+8);
00268 _sse_double_prefetch_16(s+8);
00269 _sse_double_add_real_scalar_product_16(r,s,c);
00270 }
00271 n2+=c.c1+c.c2;
00272 #endif
00273
00274 for(;i<i_max; i++)
00275 n2+=
00276 real(chi[i])*real(psi[i])+
00277 imag(chi[i])*imag(psi[i]);
00278
00279 mpi.add(n2);
00280 return n2;
00281 }
00282
00283 inline friend mdp_real imag_scalar_product(mdp_complex_field &psi,
00284 mdp_complex_field &chi,
00285 int parity=EVENODD) {
00286 double n2=0;
00287 mdp_int i_min=psi.physical_local_start(parity);
00288 mdp_int i_max=psi.physical_local_stop(parity);
00289 mdp_int i=i_min;
00290
00291 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00292 static _sse_double c ALIGN16;
00293 _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00294 _sse_double* s=(_sse_double*) chi.physical_address(i_min);
00295
00296 _sse_check_alignment(&c,0xf);
00297
00298 c.c1=c.c2=0;
00299 for(; i<i_max-7; i+=8, r+=8, s+=8) {
00300 _sse_double_prefetch_16(r+8);
00301 _sse_double_prefetch_16(s+8);
00302 _sse_double_add_imag_scalar_product_16(r,s,c);
00303 }
00304 n2+=c.c2-c.c1;
00305 #endif
00306
00307 for(;i<i_max; i++)
00308 n2+=
00309 real(psi[i])*imag(chi[i])+
00310 imag(psi[i])*real(chi[i]);
00311 mpi.add(n2);
00312 return n2;
00313 }
00314
00315 inline friend void mdp_add_scaled_field(mdp_complex_field &psi,
00316 mdp_real alpha,
00317 mdp_complex_field &chi,
00318 int parity = EVENODD) {
00319 mdp_int i_min=psi.physical_local_start(parity);
00320 mdp_int i_max=psi.physical_local_stop(parity);
00321 mdp_int i=i_min;
00322
00323 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00324 static _sse_double c ALIGN16;
00325 _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00326 _sse_double* s=(_sse_double*) chi.physical_address(i_min);
00327
00328 _sse_check_alignment(&c,0xf);
00329
00330 c.c1=c.c2=alpha;
00331 for(i=0; i<i_max-7; i+=8, r+=8, s+=8) {
00332 _sse_double_prefetch_16(r+8);
00333 _sse_double_prefetch_16(s+8);
00334 _sse_double_add_multiply_16(r,c,s);
00335 }
00336
00337 #endif
00338
00339 for(;i<i_max; i++)
00340 psi[i]+=alpha*chi[i];
00341 }
00342
00343 inline friend void mdp_add_scaled_field(mdp_complex_field &psi,
00344 mdp_complex alpha,
00345 mdp_complex_field &chi,
00346 int parity = EVENODD) {
00347 mdp_int i_min=psi.physical_local_start(parity);
00348 mdp_int i_max=psi.physical_local_stop(parity);
00349 mdp_int i=i_min;
00350
00351 for(;i<i_max; i++)
00352 psi[i]+=alpha*chi[i];
00353 }
00354
00355 inline friend mdp_complex operator* (mdp_complex_field &psi,
00356 mdp_complex_field &chi) {
00357 return scalar_product(psi, chi);
00358 }
00359
00360
00361 inline friend mdp_real relative_residue(mdp_complex_field& p,
00362 mdp_complex_field& q,
00363 int parity=EVENODD) {
00364 register double residue=0, num=0, den=0;
00365 mdp_int i_min=p.physical_local_start(parity);
00366 register mdp_int i_max=q.physical_local_stop(parity);
00367 register mdp_int i=i_min;
00368
00369 for(;i<i_max;) {
00370 num+=abs2(p[i]);
00371 den+=abs2(q[i]);
00372 if(++i % p.size_per_site()==0) {
00373 residue+=(den==0)?1.0:(num/den);
00374 num=den=0;
00375 }
00376 }
00377 mpi.add(residue);
00378 return sqrt(residue/p.lattice().global_volume());
00379 }
00380
00381 bool save_as_float(string filename,
00382 int processIO=0,
00383 mdp_int max_buffer_size=1024,
00384 bool load_header=true,
00385 mdp_int skip_bytes=0) {
00386 #if defined(USE_DOUBLE_PRECISION)
00387 header.bytes_per_site/=2;
00388 save(filename,processIO, max_buffer_size,load_header,skip_bytes,
00389 mdp_write_double_as_float);
00390 header.bytes_per_site*=2;
00391 #else
00392 save(filename,processIO, max_buffer_size,load_header,skip_bytes,0);
00393 #endif
00394 return true;
00395 }
00396
00397 bool load_as_float(string filename,
00398 int processIO=0,
00399 mdp_int max_buffer_size=1024,
00400 bool load_header=true,
00401 mdp_int skip_bytes=0) {
00402
00403 #if defined(USE_DOUBLE_PRECISION)
00404 header.bytes_per_site/=2;
00405 load(filename,processIO, max_buffer_size,load_header,skip_bytes,
00406 mdp_read_double_as_float, true);
00407 header.bytes_per_site*=2;
00408 #else
00409 load(filename,processIO, max_buffer_size,load_header,skip_bytes,0,true);
00410 #endif
00411 return true;
00412 }
00413
00414
00415 bool load_as_double(string filename,
00416 int processIO=0,
00417 mdp_int max_buffer_size=1024,
00418 bool load_header=true,
00419 mdp_int skip_bytes=0) {
00420 #if !defined(USE_DOUBLE_PRECISION)
00421 header.bytes_per_site*=2;
00422 load(filename,processIO, max_buffer_size,load_header,skip_bytes,
00423 mdp_read_float_as_double, true);
00424 header.bytes_per_site/=2;
00425 #else
00426 load(filename,processIO, max_buffer_size,load_header,skip_bytes,0,true);
00427 #endif
00428 return true;
00429 }
00430
00431 bool save_as_double(string filename,
00432 int processIO=0,
00433 mdp_int max_buffer_size=1024,
00434 bool load_header=true,
00435 mdp_int skip_bytes=0) {
00436 #if !defined(USE_DOUBLE_PRECISION)
00437 header.bytes_per_site*=2;
00438 save(filename,processIO, max_buffer_size,load_header,skip_bytes,
00439 mdp_write_float_as_double);
00440 header.bytes_per_site/=2;
00441 #else
00442 save(filename,processIO, max_buffer_size,load_header,skip_bytes,0);
00443 #endif
00444 return true;
00445 }
00446 };
00447
00448