/*
 * 
 * This source code is part of 
 *   MARBLE (MoleculAR simulation package for BiomoLEcules)
 * 
 * Written by Mitsunori Ikeguchi
 * Copyright (c) 2012 Yokohama City University
 *  
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 */

#define NONBOND_C

#include <stdio.h>
#include <math.h>
#include <stdlib.h>

#include "misc.h"
#include "atom.h"
#include "boundary.h"
#include "linked_cell.h"
#include "nonbond.h"

#ifdef _OPENMP
#include "omp.h"
#endif

void NONBOND_LIST_init(NONBOND_LIST *nl, BOUNDARY *bc)
{
  nl->n_list = 0;
  nl->n_alloc = 0;
  nl->alloc_unit = 10000;
  nl->alloc_ratio = 0.1;
  nl->n_alloc_atom = 0;
  nl->x_at_update = NULL;
  nl->nl_method  =  NL_LINKED_CELL;
  nl->vdw_method  = NV_PSW;
  nl->elec_method = NE_FSH;
  nl->outside_flag = 1;
#ifdef HG_MODE
  nl->group_flag = 1;
#else
  nl->group_flag = 0;
#endif

  NONBOND_LIST_set_cutoff(nl, bc, 9.0, 0.5, 1.0, 0.0, 0.0, 0.0);
}

void NONBOND_LIST_set_group_flag(NONBOND_LIST *nl, ATOM_DATA *ad, int flag)
{
  nl->group_flag = flag;
  if (flag)
    ATOM_DATA_set_hydrogen_group(ad);
}

void NONBOND_LIST_set_param(NONBOND_LIST *nl, BOUNDARY *bc, ATOM_DATA *ad,
			    int nl_method, int vdw_method, int elec_method,
			    double cutoff, double smooth_margin, double list_margin, 
			    double group_margin, double cell_margin, int update_step, int vir_cor)
{

  if (bc->type == PERIODIC_BOUNDARY) {
    if (bc->reclen[0] < cutoff*2.0 ||
	bc->reclen[1] < cutoff*2.0 ||
	bc->reclen[2] < cutoff*2.0) {
      lprintf("ERROR: cutoff value must be less than half of edges of the system box\n");
      marble_exit(1);
    }
  }
  
  nl->nl_method   = nl_method;
  nl->vdw_method  = vdw_method;
  nl->elec_method = elec_method;
  nl->update_step = update_step;
  
  if (vir_cor)
    BOUNDARY_long_range_correction_init(bc, ad, cutoff);

  nl->rl_off = cutoff;
  nl->rl_on  = cutoff - smooth_margin;
  nl->rl_off2 = cutoff*cutoff;

  nl->rl_list = cutoff + list_margin;
  nl->group_margin = group_margin;
  nl->cell_margin = cell_margin;
  nl->cell_div =  nl->rl_list + nl->cell_margin;
  if (nl->group_flag) {
    nl->cell_div += group_margin;
  }

  lprintf("Nonbond Configuration:\n");
  lprintf("  Method for Nonbond Pair List:  %s\n",
	  get_name_sel_data(_sel_NONBOND_LIST_METHOD, nl->nl_method));
  lprintf("  Method for van der Waals Smoothing: %s\n",
	  get_name_sel_data(_sel_NONBOND_VDW_METHOD, nl->vdw_method));
  lprintf("  Method for Electrostatic Calculation: %s\n",
	  get_name_sel_data(_sel_NONBOND_ELEC_METHOD, nl->elec_method));

  /*  
#ifdef CHARMM_VFSWITCH
  lprintf("  Method for van der Waals Smoothing: force_switch\n");
p#else  
  lprintf("  Method for van der Waals Smoothing: potential_switch\n");
#endif
  */
  
  lprintf("  Cutoff Length            = %5.1f\n", nl->rl_off);

    
  
  if (nl->vdw_method != NV_TRN) {
    lprintf("  Smoothing Start Length   = %5.1f (Margin %.1f)\n", nl->rl_on,
	    smooth_margin);
  }
  lprintf("  Length for Nonbond List  = %5.1f (Margin %.1f)\n", nl->rl_list, list_margin);
  if (nl->group_flag)
    lprintf("  Group Margin             = %5.1f\n", nl->group_margin);
  lprintf("  Cell Margin              = %5.1f\n", nl->cell_margin);
  lprintf("  Length for Cell Division = %5.1f\n", nl->cell_div);
  lprintf("  Nonbond List Update:  ");
  if (nl->update_step > 0) {
    lprintf("%d Step(s)\n", nl->update_step);
  } else {
    lprintf("Automatic\n");
  }
  lprintf("  Periodic Boundary: VDW Long Range Correction: %s\n\n",
	  (vir_cor) ? "on" : "off");

  if (nl->vdw_method == NV_TRN) {
    nl->vdw_method = NV_PSW;
    nl->rl_on = nl->rl_off + 1.0;
  }
}
			

void NONBOND_LIST_set_cutoff(NONBOND_LIST *nl, BOUNDARY *bc,
			     double rl, double rl_sm, double rl_list_buf,
			     double rs, double rs_sm, double rs_list_buf)
{
  nl->rl_on = rl - rl_sm;
  nl->rl_off = rl;
  nl->rl_list = nl->rl_off + rl_list_buf;
  nl->rl_off2 = nl->rl_off*nl->rl_off;

  nl->rs_on = rs - rs_sm;
  nl->rs_off = rs;
  nl->rs_list = nl->rs_on - rs_list_buf;
  if (nl->rs_on < 0.0)   nl->rs_on = 0.0;
  if (nl->rs_off < 0.0)  nl->rs_off = 0.0;
  if (nl->rs_list < 0.0) nl->rs_list = 0.0;
  
  nl->rs_on2 = nl->rs_on*nl->rs_on;

  nl->rl_list0 = nl->rl_list;
  nl->rl_off0  = nl->rl_off;

  nl->cell_margin = 0.0;
  
  NONBOND_LIST_reset_cutoff_from_boundary(nl,bc);
}

void NONBOND_LIST_reset_cutoff_from_boundary(NONBOND_LIST *nl,BOUNDARY *bc)
{
  double min;
  if (bc->type == PERIODIC_BOUNDARY) {
    min = bc->box[0]*0.5;
    if (min > bc->box[1]*0.5) min = bc->box[1]*0.5;
    if (min > bc->box[2]*0.5) min = bc->box[2]*0.5;
    min -= 0.01;
    
    if (nl->rl_list0 > min) {
      nl->rl_list = min;
      /*
	 lprintf("WARNING cutoff length for nonbond list exceeds half length of box!\n");
	 lprintf("resetting length to %f\n", nl->rl_list);
      */
    } else {
      nl->rl_list = nl->rl_list0;
    }
    
    if (nl->rl_off0 > min) {
      nl->rl_on = min - (nl->rl_off - nl->rl_on);
      nl->rl_off = min;
      nl->rl_off2 = nl->rl_off*nl->rl_off;
      lprintf("WARNING cutoff length exceeds half length of box!\n");
      lprintf("resetting length to %f (smoothing start: %f)\n",
	      nl->rl_off, nl->rl_on);
    } else if (nl->rl_off != nl->rl_off0) {
      nl->rl_on = nl->rl_off0 - (nl->rl_off - nl->rl_on);
      nl->rl_off = nl->rl_off0;
      nl->rl_off2 = nl->rl_off*nl->rl_off;
      lprintf("WARNING cutoff length is reset to user-defined value\n");
      lprintf("resetting length to %f (smoothing start: %f)\n",
	      nl->rl_off, nl->rl_on);
    }
  }
}

void NONBOND_LIST_get_rl(NONBOND_LIST *nl,
			 double *rl, double *rl_sm, double *rl_list)
{
  *rl      = nl->rl_off;
  *rl_sm   = nl->rl_off - nl->rl_on;
  *rl_list = nl->rl_list - nl->rl_off;
}

int NONBOND_LIST_check_update(NONBOND_LIST *nl, ATOM_DATA *ad,
			      BOUNDARY *bc, int step)
{
  double max1, max2;
  double dx, dy, dz;
  double len2;
  int i;
  
  if (nl->update_step > 0) {
    if (step % nl->update_step == 0)
      return 1;
    else
      return 0;
  }
  
  if (ad->natom > nl->n_alloc_atom) {
    nl->x_at_update = erealloc("NONBOND_LIST_check_update",
			       nl->x_at_update, sizeof(VEC)*ad->natom);
    nl->n_alloc_atom = ad->natom;
    for (i=0;i<ad->natom;i++) {
      nl->x_at_update[i] = ad->x[i];
    }
    return 1;
  }

  max1 = max2 = 0.0;
  for (i=0;i<ad->natom;i++) {
    dx = ad->x[i].x - nl->x_at_update[i].x;
    dy = ad->x[i].y - nl->x_at_update[i].y;
    dz = ad->x[i].z - nl->x_at_update[i].z;
    if (bc->type == PERIODIC_BOUNDARY) {
      if (dx > bc->boxh[0]) dx -= bc->box[0];
      if (dy > bc->boxh[1]) dy -= bc->box[1];
      if (dz > bc->boxh[2]) dz -= bc->box[2];
    
      if (dx < -bc->boxh[0]) dx += bc->box[0];
      if (dy < -bc->boxh[1]) dy += bc->box[1];
      if (dz < -bc->boxh[2]) dz += bc->box[2];
    }
    len2 = dx*dx+dy*dy+dz*dz;

    if (max2 < len2) {
      if (max1 < len2) {
	max2 = max1;
	max1 = len2;
      } else {
	max2 = len2;
      }
    }
  }
  /*
  lprintf("Sum of the two largest displacements %f\n", sqrt(max1)+sqrt(max2));
  */
  if (sqrt(max1)+sqrt(max2) >= nl->rl_list - nl->rl_off) {
    for (i=0;i<ad->natom;i++) {
      nl->x_at_update[i] = ad->x[i];
    }
    return 1;
  } else {
    return 0;
  }
}

void NONBOND_LIST_setup(NONBOND_LIST *nl, ATOM_DATA *ad,
			LINKED_CELL *lc,  BOUNDARY *bc)
{
  dtime();
  switch (nl->nl_method) {
  case NL_ATOM_BASED:
    make_nonbond_list_atom_based(nl, ad,  bc);
    add_dtime(&nl->time_list);
    break;
  case NL_RESIDUE_BASED:
    ATOM_DATA_set_center_atom_in_residue(ad);
    make_nonbond_list_residue_based(nl, ad,  bc, 1);
    alloc_nonbond_list(nl);
    make_nonbond_list_residue_based(nl, ad,  bc, 0);
    add_dtime(&nl->time_list);
    break;
  case NL_LINKED_CELL:
    LINKED_CELL_setup(lc, nl, ad, bc);
    LINKED_CELL_assign_atom(lc, ad, bc);
    LINKED_CELL_make_cell_pairlist(lc, bc);
    LINKED_CELL_calc_tr_x(lc, ad, bc);
    LINKED_CELL_alloc_nonbond_list(lc, nl, ad, bc);
    LINKED_CELL_make_nonbond_list(lc, nl, ad, bc, 0);
    add_dtime(&nl->time_list);
    break;
  case NL_FMM:
    /* calc_fmm(ad); */
    /*
    fmm_assign_all_atoms(ad);
    add_dtime(&nl->time_fmm_setup);
    FMM_make_nonbond_list(nl, ad);
    add_dtime(&nl->time_list);
    */
    break;
  }
#ifdef MPI_RDMD
  assign_tasks_nonbond_list(nl);
#endif  
}

void make_nonbond_list(NONBOND_LIST *nl, ATOM_DATA *ad,
		       LINKED_CELL *lc, BOUNDARY *bc)
{
  dtime();
  switch (nl->nl_method) {
  case NL_ATOM_BASED:
    make_nonbond_list_atom_based(nl, ad,  bc);
    add_dtime(&nl->time_list);
    break;
  case NL_RESIDUE_BASED:
    make_nonbond_list_residue_based(nl, ad, bc, 0);
    add_dtime(&nl->time_list);
    break;
  case NL_LINKED_CELL:
    LINKED_CELL_assign_atom(lc, ad, bc);
    LINKED_CELL_calc_tr_x(lc, ad, bc);
    if (LINKED_CELL_make_nonbond_list(lc, nl, ad, bc, 0) < 0) {
      LINKED_CELL_alloc_nonbond_list(lc, nl, ad, bc);
      LINKED_CELL_make_nonbond_list(lc, nl, ad, bc, 0);
    }
    add_dtime(&nl->time_list);
    break;
  case NL_FMM:
    /* calc_fmm(ad); */
    /*
    fmm_assign_all_atoms(ad);
    add_dtime(&nl->time_fmm_setup);
    FMM_make_nonbond_list(nl, ad);
    add_dtime(&nl->time_list);
    */
    break;
  }
  
#ifdef MPI_RDMD
  assign_tasks_nonbond_list(nl);
#endif  
}

/* residue based cutoff */
void make_nonbond_list_residue_based(NONBOND_LIST *nl, ATOM_DATA *ad, 
				     BOUNDARY *bc, int only_count)
{
  int i,j, k, nlist, inc_flag, iatom, jatom, ires, jres;
  double dx, dy, dz, cutoff2, weight;
  int res_to_res_start, center_to_center;

  cutoff2 = nl->rl_list * nl->rl_list;

#if 0  
  /* calc the gravity center of each residue */
  for (i=0;i<ad->nres;i++) {
    if (ad->r[i].flag & RES_FLAG_WATER) {
      /* WATER */
      ad->r[i].center = ad->x[ad->r[i].start_atom + bc->wat_atom_order[0]];
    } else {
      /* NOT WATER */
      ad->r[i].center.x = ad->r[i].center.y = ad->r[i].center.z = 0.0;
      weight=0.0;
      for (j=0;j<ad->r[i].natom;j++) {
	iatom = ad->r[i].start_atom + j;
	ad->r[i].center.x += ad->x[iatom].x * ad->w[iatom];
	ad->r[i].center.y += ad->x[iatom].y * ad->w[iatom];
	ad->r[i].center.z += ad->x[iatom].z * ad->w[iatom];
	weight += ad->w[iatom];
      }
      ad->r[i].center.x /= weight;
      ad->r[i].center.y /= weight;
      ad->r[i].center.z /= weight;
    }
  }
#endif /* 0 */

  for (i=0;i<ad->natom;i++) {
    ad->ex[i].id = -1;
  }

  nlist = 0;
  for (ires = 0; ires < ad->nres-1; ires++) {
    for (jres = ires; jres < ad->nres; jres++) {
#if 0      
      dx = ad->r[ires].center.x - ad->r[jres].center.x;
      dy = ad->r[ires].center.y - ad->r[jres].center.y;
      dz = ad->r[ires].center.z - ad->r[jres].center.z;
#else
      dx = ad->x[ad->r[ires].center_atom].x - ad->x[ad->r[jres].center_atom].x;
      dy = ad->x[ad->r[ires].center_atom].y - ad->x[ad->r[jres].center_atom].y;
      dz = ad->x[ad->r[ires].center_atom].z - ad->x[ad->r[jres].center_atom].z;
#endif      
      if (bc->type == PERIODIC_BOUNDARY) {
	if (dx > bc->boxh[0]) dx -= bc->box[0];
	if (dy > bc->boxh[1]) dy -= bc->box[1];
	if (dz > bc->boxh[2]) dz -= bc->box[2];
	
	if (dx < -bc->boxh[0]) dx += bc->box[0];
	if (dy < -bc->boxh[1]) dy += bc->box[1];
	if (dz < -bc->boxh[2]) dz += bc->box[2];
      }
      if (Length2(dx, dy, dz) > cutoff2) continue;

      /* lprintf("WP %d %d %f\n", ires+1, jres+1, Length2(dx,dy,dz)); */
      /* lprintf("WP %d %d %f\n", ad->r[ires].center_atom, ad->r[jres].center_atom, Length2(dx,dy,dz)); */

      /*
      if (ires != jres) {
	i = ad->r[ires].center_atom;
	for (j=0;j<ad->ex[i].n_exatom;j++) {
	  if (ad->ex[i].exatom[j] == ad->r[jres].center_atom) {
	    
	  }
	}
	
	  j = ad->r[jres].center_atom;
	
	if (!only_count) {
	  if (nlist >= nl->n_alloc) {
	    nl->n_alloc += nl->alloc_unit;
	    printf("NBPAIR: N_LIST %d, ALLOCATED %d\n", nlist, nl->n_alloc);
	    nl->ij_list = realloc(nl->ij_list, sizeof(int)*2*nl->n_alloc);
	    if (nl->ij_list == NULL) {
	      lprintf("ERROR: Unable to realloc memory (%d) for nonbond list\n",
		      nl->n_alloc);
	      return;
	    }
	  }
	  nl->ij_list[nlist][0] = ;
	  nl->ij_list[nlist][1] = ;
	}
	nlist++;
      }
      */

      res_to_res_start = nlist;
      center_to_center = -1;
      for (i=ad->r[ires].start_atom; i<=ad->r[ires].end_atom; i++) {
	for (j=0;j<ad->ex[i].n_exatom;j++) {
	  ad->ex[ad->ex[i].exatom[j]].id = i;
	}
	for (j=ad->r[jres].start_atom; j<=ad->r[jres].end_atom; j++) {
	  if (ires == jres && i >= j) continue;
	  if (ad->ex[j].id == i) continue;
	  
	  if (i == ad->r[ires].center_atom &&
	      j == ad->r[jres].center_atom) {
	    center_to_center = nlist;
	  }
	  
	  if (!only_count) {
	    if (nlist >= nl->n_alloc) {
	      nl->n_alloc += nl->alloc_unit;
	      printf("NB PAIR: N_LIST %d, ALLOCATED %d\n", nlist, nl->n_alloc);
	      nl->ij_list = realloc(nl->ij_list, sizeof(int)*2*nl->n_alloc);
	      if (nl->ij_list == NULL) {
		lprintf("ERROR: Unable to realloc memory (%d) for nonbond list\n",
			nl->n_alloc);
		return;
	      }
	    }
	    nl->ij_list[nlist][0] = i;
	    nl->ij_list[nlist][1] = j;
	  }
	  nlist++;
	}
      }
      if (!only_count && center_to_center >= 0) {
	/* replace center with start
	   place center atoms at first
	*/
	i = nl->ij_list[res_to_res_start][0];
	j = nl->ij_list[res_to_res_start][1];
	nl->ij_list[res_to_res_start][0] = nl->ij_list[center_to_center][0];
	nl->ij_list[res_to_res_start][1] = nl->ij_list[center_to_center][1];
	nl->ij_list[center_to_center][0] = i;
	nl->ij_list[center_to_center][1] = j;
      }
    }
  }
  nl->n_list = nlist;
}

void alloc_nonbond_list(NONBOND_LIST *nl)
{
  int require_size;

  nl->alloc_unit = (int) (nl->n_list * nl->alloc_ratio);
  require_size = nl->n_list + nl->alloc_unit;

  if (nl->n_alloc < require_size) {
    if (nl->n_alloc == 0) {
      nl->ij_list = emalloc("alloc_nonbond_list", sizeof(int)*2*require_size);
    } else {
      nl->ij_list = erealloc("alloc_nonbond_list", nl->ij_list, sizeof(int)*2*require_size);
    }
    nl->n_alloc = require_size;
    lprintf("NBPAIR: N_LIST %d, ALLOCATED %d\n", nl->n_list, nl->n_alloc);
  }
}

void make_nonbond_list_atom_based(NONBOND_LIST *nl, ATOM_DATA *ad, 
				  BOUNDARY *bc)
{
  int i,j, k, nlist, inc_flag;
  double dx, dy, dz, cutoff2;

  nlist = 0;
  cutoff2 = nl->rl_list * nl->rl_list;
  if (nl->n_alloc == 0) {
    /* In the first time, count the number of the nonbond list and
       allocate memory.
    */
    for (i = 0; i < ad->natom-1; i++) {
      for (j = i+1; j < ad->natom; j++) {
	inc_flag = 1;
	for (k = 0; k < ad->ex[i].n_exatom; k++) {
	  if (j == ad->ex[i].exatom[k]) {
	    inc_flag = 0;
	    break;
	  }
	}
	if (!inc_flag) continue;
	dx = ad->x[i].x - ad->x[j].x;
	dy = ad->x[i].y - ad->x[j].y;
	dz = ad->x[i].z - ad->x[j].z;
	if (bc->type == PERIODIC_BOUNDARY) {
	  if (dx > bc->boxh[0]) dx -= bc->box[0];
	  if (dy > bc->boxh[1]) dy -= bc->box[1];
	  if (dz > bc->boxh[2]) dz -= bc->box[2];
    
	  if (dx < -bc->boxh[0]) dx += bc->box[0];
	  if (dy < -bc->boxh[1]) dy += bc->box[1];
	  if (dz < -bc->boxh[2]) dz += bc->box[2];
	}
	
	if (cutoff2 <= 0.0 || Length2(dx, dy, dz) <= cutoff2) {
	  /*
	  lprintf("pair: %d (%f %f %f) - %d (%f %f %f)\n",
		  i, ad->x[i].x,ad->x[i].y, ad->x[i].z,
		  j, ad->x[j].x,ad->x[j].y, ad->x[j].z);
	  */
	  nlist++;
	}
      }
    }
    nl->alloc_unit = (int) (nlist * nl->alloc_ratio);
    nl->n_alloc = nlist + nl->alloc_unit;
	
    nl->ij_list = emalloc("make_nonbond_list",
			  sizeof(int)*2*nl->n_alloc);
    lprintf("NBPAIR: N_LIST %d, ALLOCATED %d\n", nlist, nl->n_alloc);
    
  }

  nlist = 0;
  for (i = 0; i < ad->natom-1; i++) {
    for (j = i+1; j < ad->natom; j++) {
      inc_flag = 1;
      for (k = 0; k < ad->ex[i].n_exatom; k++) {
	if (j == ad->ex[i].exatom[k]) {
	  inc_flag = 0;
	  break;
	}
      }

      if (!inc_flag) continue;
      /* if the atom pair is not excluded. */
      dx = ad->x[i].x - ad->x[j].x;
      dy = ad->x[i].y - ad->x[j].y;
      dz = ad->x[i].z - ad->x[j].z;
      if (bc->type == PERIODIC_BOUNDARY) {
	if (dx > bc->boxh[0]) dx -= bc->box[0];
	if (dy > bc->boxh[1]) dy -= bc->box[1];
	if (dz > bc->boxh[2]) dz -= bc->box[2];
    
	if (dx < -bc->boxh[0]) dx += bc->box[0];
	if (dy < -bc->boxh[1]) dy += bc->box[1];
	if (dz < -bc->boxh[2]) dz += bc->box[2];
      }
	
      if (cutoff2 <= 0.0 || Length2(dx, dy, dz) <= cutoff2) {
	
	if (nlist >= nl->n_alloc) {
	  nl->n_alloc += nl->alloc_unit;
	  printf("NBPAIR: N_LIST %d, ALLOCATED %d\n", nlist, nl->n_alloc);
	  nl->ij_list = realloc(nl->ij_list, sizeof(int)*2*nl->n_alloc);
	  if (nl->ij_list == NULL) {
	    lprintf("MEMORY ERROR: Can't realloc memory (%d) for nonbond list\n",nl->n_alloc);
	    return;
	  }
	}
	nl->ij_list[nlist][0] = i;
	nl->ij_list[nlist][1] = j;
	nlist++;
      }
    }
  }
  nl->n_list = nlist;
}

void nonbond_energy_force(NONBOND_LIST *nl, ATOM_DATA *ad, BOUNDARY *bc,
                          double *vdw, double *edir, double *hbond)
{
  LINKED_CELL *lc;
  lc = nl->lc;

 
  if (nl->elec_method == NE_FSH) {
    if (nl->vdw_method == NV_PSW)
#define NB_VDW   NV_PSW
#define NB_ELEC  NE_FSH
#include "nonbond_direct.h"
    else if (nl->vdw_method == NV_FSW)
#define NB_VDW   NV_FSW
#define NB_ELEC  NE_FSH
#include "nonbond_direct.h"
  } else if (nl->elec_method == NE_PSW) {
    if (nl->vdw_method == NV_PSW)
#define NB_VDW   NV_PSW
#define NB_ELEC  NE_PSW
#include "nonbond_direct.h"
    else if (nl->vdw_method == NV_FSW) {
      lprintf("ERROR: both potential_switch for electrostatic cutoff and force_switch for van der Waals cutoff can not be used.\n");
      marble_exit(1);
      
    }
  } else {
    lprintf("ERROR: only support shifted force and switch for electrostatic cutoff.\n");
    marble_exit(1);
  }
}

void nonbond_energy_force_smooth(NONBOND_LIST *nl, ATOM_DATA *ad, BOUNDARY *bc,
				 double *vdw, double *elec, double *hbond)
{
  int i,j,k;
  int vdw_index;
  double dx, dy, dz;
  double len, len2, len6, len12;
  double vdw12, vdw6, hb12, hb10, force, ene_t, elec_t;
  /* for atom_ene */
  int group_i, group_j;
  double ene_t2, elec_t2;

  /* for smoothing */
  double S, dS, len_rl_on, len_rs_on, rl_tmp, rs_tmp, rl_diff3, rs_diff3;
  
  rl_tmp = 3.0 * nl->rl_off - nl->rl_on;
  rs_tmp = 3.0 * nl->rs_off - nl->rs_on;
  rl_diff3 = pow(nl->rl_off - nl->rl_on, 3.0);
  rs_diff3 = pow(nl->rs_off - nl->rs_on, 3.0);
  
  *vdw = *elec = *hbond = 0.0;
#ifdef MPI_RDMD  
  for (k=nl->start_task;k<=nl->end_task;k++) {
#else  /* MPI_RDMD */
  for (k=0;k<nl->n_list;k++) {
#endif /* MPI_RDMD */
    i = nl->ij_list[k][0];
    j = nl->ij_list[k][1];

    dx = ad->x[i].x - ad->x[j].x;
    dy = ad->x[i].y - ad->x[j].y;
    dz = ad->x[i].z - ad->x[j].z;
    
    if (bc->type == PERIODIC_BOUNDARY) {
      if (dx > bc->box[0]*0.5) dx -= bc->box[0];
      if (dy > bc->box[1]*0.5) dy -= bc->box[1];
      if (dz > bc->box[2]*0.5) dz -= bc->box[2];
    
      if (dx < -bc->box[0]*0.5) dx += bc->box[0];
      if (dy < -bc->box[1]*0.5) dy += bc->box[1];
      if (dz < -bc->box[2]*0.5) dz += bc->box[2];
    }

    len2 = dx * dx + dy * dy + dz * dz;
    if (len2 >= nl->rl_off2 || len2 <= nl->rs_on2) continue;

    len6 = len2 * len2 * len2;
    len12 = len6 * len6;
    len = sqrt(len2);

    /** DEBUG **
    for (len = nl->rs_on+0.01; len <= nl->rl_off; len+= 0.01) {
    ** end of DEBUG **/
    
    if (len > nl->rl_on) {
      len_rl_on = len - nl->rl_on;
      S = 1.0-len_rl_on * len_rl_on * (rl_tmp - 2*len) / rl_diff3;
      dS = -6.0 * len_rl_on * (nl->rl_off - len) / rl_diff3;
    } else if (len < nl->rs_off) {
      len_rs_on = len - nl->rs_on;
      S = len_rs_on * len_rs_on * (rs_tmp - 2*len) / rs_diff3;
      dS = 6.0 * len_rs_on * (nl->rs_off - len) / rs_diff3;
    } else {
      S = 1.0; dS = 0.0;
    }
    
    
    vdw_index = ad->index[ad->vdw_type[i]+ad->vdw_type[j]*ad->ntype];
    if (vdw_index >= 0 /* || len2 > ad->hbond_criteria2 */) {
      vdw12 = ad->vdw12[vdw_index] / len12;
      vdw6 = ad->vdw6[vdw_index] / len6;
      /*
      *vdw += vdw12 - vdw6;
      force = 12.0 * vdw12 - 6.0 * vdw6;
      */
      *vdw += (vdw12 - vdw6)*S;
      ene_t = vdw12 - vdw6;
      force = 12.0 * vdw12 - 6.0 * vdw6;
    } else {
      vdw_index = - vdw_index - 2;
      hb12 = ad->hb12[vdw_index] / len12;
      hb10 = ad->hb10[vdw_index] / (len6 * len2 * len2);
      /*
      *hbond += hb12 - hb10;
      force = 12.0 * hb12 - 10.0 * hb10;
      */
      *hbond += (hb12 - hb10)*S;
      ene_t = hb12 - hb10;
      force = 12.0 * hb12 - 10.0 * hb10;
    }
    elec_t = ad->q[i] * ad->q[j] / len;
    /*
     *elec += elec_t;
     force = (force + elec_t) / len2;
    */
    *elec += elec_t*S;
#if 1  /* TEST */    
    force = (force + elec_t) / len2 * S - (ene_t + elec_t) * dS / len;
#else
    force = (force + elec_t) / len2 * S;
#endif
    
    if (ad->atom_ene_sample_flag) {
      ene_t2 = ene_t*0.5*S;
      elec_t2 = elec_t*0.5*S;
      group_i = ad->atom_ene_group[i];
      group_j = ad->atom_ene_group[j];
      ad->atom_ene[i][group_j][ATOM_ENE_VDW] += ene_t2;
      ad->atom_ene[j][group_i][ATOM_ENE_VDW] += ene_t2;
      ad->atom_ene[i][group_j][ATOM_ENE_ELEC] += elec_t2;
      ad->atom_ene[j][group_i][ATOM_ENE_ELEC] += elec_t2;
    }
    
    ad->f[i].x += force * dx;
    ad->f[i].y += force * dy;
    ad->f[i].z += force * dz;
    
    ad->f[j].x -= force * dx;
    ad->f[j].y -= force * dy;
    ad->f[j].z -= force * dz;
      
    /** DEBUG **
    if (ad->q[i] != 0.0 && ad->q[j] != 0.0 && vdw12 != 0.0) 
      lprintf("%lf %lf %lf\n", len, (elec_t+ene_t)*S, force / len);
    }
    if (ad->q[i] != 0.0 && ad->q[j] != 0.0 && vdw12 != 0.0) 
      exit(1);
    ** endof DEBUG **/

    /* virial */
    ad->virial[0] += force * dx * dx;
    ad->virial[1] += force * dy * dy;
    ad->virial[2] += force * dz * dz;
  }
}

void nonbond_energy_force_all(NONBOND_LIST *nl, ATOM_DATA *ad, BOUNDARY *bc,
			      double *vdw, double *elec, double *hbond)
{
  int i,j,k;
  int vdw_index;
  double dx, dy, dz;
  double len, len2, len6, len12;
  double vdw12, vdw6, hb12, hb10, force, ene_t, elec_t;
  /* for smoothing */
  double S, dS, len_rl_on, len_rs_on, rl_tmp, rs_tmp, rl_diff3, rs_diff3;
  /* for atom_ene */
  int group_i, group_j;
  double ene_t2, elec_t2;

  *vdw = *elec = *hbond = 0.0;
  
  rl_tmp = 3.0 * nl->rl_off - nl->rl_on;
  rl_diff3 = pow(nl->rl_off - nl->rl_on, 3.0);
  
#ifdef MPI_RDMD  
  for (k=nl->start_task;k<=nl->end_task;k++) {
#else  /* MPI_RDMD */
  for (k=0;k<nl->n_list;k++) {
#endif /* MPI_RDMD */
    i = nl->ij_list[k][0];
    j = nl->ij_list[k][1];

    dx = ad->x[i].x - ad->x[j].x;
    dy = ad->x[i].y - ad->x[j].y;
    dz = ad->x[i].z - ad->x[j].z;

#if 0    
    if (bc->type == PERIODIC_BOUNDARY) {
      if (dx > bc->box[0]*0.5) dx -= bc->box[0];
      if (dy > bc->box[1]*0.5) dy -= bc->box[1];
      if (dz > bc->box[2]*0.5) dz -= bc->box[2];
    
      if (dx < -bc->box[0]*0.5) dx += bc->box[0];
      if (dy < -bc->box[1]*0.5) dy += bc->box[1];
      if (dz < -bc->box[2]*0.5) dz += bc->box[2];
    }
#endif    

    len2 = dx * dx + dy * dy + dz * dz;
    len = sqrt(len2);

#if 1  /* smooth */
    if (len <= nl->rl_off) {
      if (len > nl->rl_on) {
	len_rl_on = len - nl->rl_on;
	S = 1.0-len_rl_on * len_rl_on * (rl_tmp - 2*len) / rl_diff3;
	dS = -6.0 * len_rl_on * (nl->rl_off - len) / rl_diff3;
      } else {
	S = 1.0; dS = 0.0;
      }
#endif
      
      len6 = len2 * len2 * len2;
      len12 = len6 * len6;
    
      vdw_index = ad->index[ad->vdw_type[i]+ad->vdw_type[j]*ad->ntype];
#ifdef HBOND    
      if (vdw_index >= 0 /* || len2 > ad->hbond_criteria2 */) {
#endif      
	vdw12 = ad->vdw12[vdw_index] / len12;
	vdw6 = ad->vdw6[vdw_index] / len6;
	ene_t = vdw12 - vdw6;
#if 1  /* smooth */
	*vdw += ene_t*S;
	force = (12.0 * vdw12 - 6.0 * vdw6)/len2 * S - ene_t * dS /len;
#else	
	*vdw += ene_t;
	force = (12.0 * vdw12 - 6.0 * vdw6)/len2;
#endif	
#ifdef HBOND    
      } else {
	vdw_index = - vdw_index - 2;
	hb12 = ad->hb12[vdw_index] / len12;
	hb10 = ad->hb10[vdw_index] / (len6 * len2 * len2);
	*hbond += hb12 - hb10;
	force = 12.0 * hb12 - 10.0 * hb10;
      }
#endif
#if 1 /* smooth */
    } else {
      ene_t = 0.0;
      force = 0.0;
    }
#endif

#if 1
    elec_t = ad->q[i] * ad->q[j] / len;
    *elec += elec_t;
    force += elec_t / len2;
#endif    
    
    if (ad->atom_ene_sample_flag) {
      ene_t2  = ene_t*0.5*S;
      elec_t2 = elec_t*0.5;
      group_i = ad->atom_ene_group[i];
      group_j = ad->atom_ene_group[j];
      ad->atom_ene[i][group_j][ATOM_ENE_VDW] += ene_t2;
      ad->atom_ene[j][group_i][ATOM_ENE_VDW] += ene_t2;
      ad->atom_ene[i][group_j][ATOM_ENE_ELEC] += elec_t2;
      ad->atom_ene[j][group_i][ATOM_ENE_ELEC] += elec_t2;
    }
    
    ad->f[i].x += force * dx;
    ad->f[i].y += force * dy;
    ad->f[i].z += force * dz;
    
    ad->f[j].x -= force * dx;
    ad->f[j].y -= force * dy;
    ad->f[j].z -= force * dz;

    /* virial */
    ad->virial[0] += force * dx * dx;
    ad->virial[1] += force * dy * dy;
    ad->virial[2] += force * dz * dz;
  }
}

  
#if 1
void nonbond_energy_force_residue_based(NONBOND_LIST *nl, ATOM_DATA *ad,
					BOUNDARY *bc,
					double *vdw, double *elec, double *hbond)
{
  int i,j,k;
  int vdw_index;
  double dx, dy, dz;
  double c_dx, c_dy, c_dz, c_force, c_len;
  int c_i, c_j, cur_ires, cur_jres, ires, jres, skip;
  double len, len2, len6, len12;
  double vdw12, vdw6, hb12, hb10, force, ene_t, elec_t;
  VEC offset;
  
  /* for smoothing */
  double S, dS, len_rl_on, len_rs_on, rl_tmp, rs_tmp, rl_diff3, rs_diff3;
  /* for atom_ene */
  int group_i, group_j;
  double ene_t2, elec_t2;
  
  rl_tmp = 3.0 * nl->rl_off - nl->rl_on;
  rs_tmp = 3.0 * nl->rs_off - nl->rs_on;
  rl_diff3 = pow(nl->rl_off - nl->rl_on, 3.0);
  rs_diff3 = pow(nl->rs_off - nl->rs_on, 3.0);

  *vdw = *elec = *hbond = 0.0;
  cur_ires = cur_jres = -1;
#ifdef MPI_RDMD
  for (k=nl->start_task;k<=nl->end_task;k++) {
#else  /* MPI_RDMD */
  for (k=0;k<nl->n_list;k++) {
#endif /* MPI_RDMD */
    i = nl->ij_list[k][0];
    j = nl->ij_list[k][1];

    dx = ad->x[i].x - ad->x[j].x;
    dy = ad->x[i].y - ad->x[j].y;
    dz = ad->x[i].z - ad->x[j].z;

    ires = ad->a[i].resno;
    jres = ad->a[j].resno;
    
    if (ires != cur_ires || jres != cur_jres) {
      /* first pair in inter molecules */
      cur_ires = ires;
      cur_jres = jres;
      c_i = i;
      c_j = j;
      if (bc->type == PERIODIC_BOUNDARY) {
	offset.x = offset.y = offset.z = 0.0;
	if (dx > bc->box[0]*0.5) offset.x = - bc->box[0];
	if (dy > bc->box[1]*0.5) offset.y = - bc->box[1];
	if (dz > bc->box[2]*0.5) offset.z = - bc->box[2];
    
	if (dx < -bc->box[0]*0.5) offset.x = bc->box[0];
	if (dy < -bc->box[1]*0.5) offset.y = bc->box[1];
	if (dz < -bc->box[2]*0.5) offset.z = bc->box[2];
	
	dx += offset.x;
	dy += offset.y;
	dz += offset.z;
      }
    
      len2 = dx * dx + dy * dy + dz * dz;
      if (len2 >= nl->rl_off2 || len2 <= nl->rs_on2) {
	skip = 1;
	continue;
      } else {
	skip = 0;
      }
      len = sqrt(len2);
      c_dx = dx;
      c_dy = dy;
      c_dz = dz;
      c_len = len;

      if (len > nl->rl_on) {
	len_rl_on = len - nl->rl_on;
	S = 1.0-len_rl_on * len_rl_on * (rl_tmp - 2*len) / rl_diff3;
	dS = -6.0 * len_rl_on * (nl->rl_off - len) / rl_diff3;
      } else {
	S  = 1.0;
	dS = 0.0;
      }
    } else {
      if (skip) continue;
      if (bc->type == PERIODIC_BOUNDARY) {
	dx += offset.x;
	dy += offset.y;
	dz += offset.z;
      }
      len2 = dx * dx + dy * dy + dz * dz;
      len = sqrt(len2);
    }
    
    len6 = len2 * len2 * len2;
    len12 = len6 * len6;
    
    vdw_index = ad->index[ad->vdw_type[i]+ad->vdw_type[j]*ad->ntype];
    if (vdw_index >= 0 /* || len2 > ad->hbond_criteria2 */) {
      vdw12 = ad->vdw12[vdw_index] / len12;
      vdw6 = ad->vdw6[vdw_index] / len6;
      ene_t = vdw12 - vdw6;
      
      if (ene_t > 100.0) {
	lprintf("i, j = %d,%d, %f, %f\n", i,j, ene_t, S);
      }
      
      *vdw += ene_t * S;
      force = 12.0 * vdw12 - 6.0 * vdw6;
    } else {
      vdw_index = - vdw_index - 2;
      hb12 = ad->hb12[vdw_index] / len12;
      hb10 = ad->hb10[vdw_index] / (len6 * len2 * len2);
      ene_t = hb12 - hb10;
      *hbond += ene_t * S;
      force = 12.0 * hb12 - 10.0 * hb10;
    }
    elec_t = ad->q[i] * ad->q[j] / len;

    *elec += elec_t * S;
    
    force = (force + elec_t) / len2 * S;
    c_force = - (ene_t + elec_t) * dS / c_len;
    
    if (ad->atom_ene_sample_flag) {
      ene_t2  = ene_t*0.5*S;
      elec_t2 = elec_t*0.5;
      group_i = ad->atom_ene_group[i];
      group_j = ad->atom_ene_group[j];
      ad->atom_ene[i][group_j][ATOM_ENE_VDW] += ene_t2;
      ad->atom_ene[j][group_i][ATOM_ENE_VDW] += ene_t2;
      ad->atom_ene[i][group_j][ATOM_ENE_ELEC] += elec_t2;
      ad->atom_ene[j][group_i][ATOM_ENE_ELEC] += elec_t2;
    }
    
    ad->f[i].x += force * dx;
    ad->f[i].y += force * dy;
    ad->f[i].z += force * dz;
    
    ad->f[j].x -= force * dx;
    ad->f[j].y -= force * dy;
    ad->f[j].z -= force * dz;
    /* virial */
    ad->virial[0] += force * dx * dx;
    ad->virial[1] += force * dy * dy;
    ad->virial[2] += force * dz * dz;

    /* for center atom */
    ad->f[c_i].x += c_force * c_dx;
    ad->f[c_i].y += c_force * c_dy;
    ad->f[c_i].z += c_force * c_dz;
    ad->f[c_j].x -= c_force * c_dx;
    ad->f[c_j].y -= c_force * c_dy;
    ad->f[c_j].z -= c_force * c_dz;
    ad->virial[0] += c_force * c_dx * c_dx;
    ad->virial[1] += c_force * c_dy * c_dy;
    ad->virial[2] += c_force * c_dz * c_dz;
  }
}
#else

void nonbond_energy_force_residue_based(NONBOND_LIST *nl, ATOM_DATA *ad, BOUNDARY *bc,
					double *vdw, double *elec, double *hbond)
{
  int i,j,k;
  int vdw_index;
  double dx, dy, dz;
  double len, len2, len6, len12;
  double vdw12, vdw6, hb12, hb10, force, ene_t, elec_t;

  *vdw = *elec = *hbond = 0.0;
#ifdef MPI_RDMD
  for (k=nl->start_task;k<=nl->end_task;k++) {
#else  /* MPI_RDMD */
  for (k=0;k<nl->n_list;k++) {
#endif /* MPI_RDMD */
    i = nl->ij_list[k][0];
    j = nl->ij_list[k][1];

    dx = ad->x[i].x - ad->x[j].x;
    dy = ad->x[i].y - ad->x[j].y;
    dz = ad->x[i].z - ad->x[j].z;
    
    if (bc->type == PERIODIC_BOUNDARY) {
      if (dx > bc->box[0]*0.5) dx -= bc->box[0];
      if (dy > bc->box[1]*0.5) dy -= bc->box[1];
      if (dz > bc->box[2]*0.5) dz -= bc->box[2];
    
      if (dx < -bc->box[0]*0.5) dx += bc->box[0];
      if (dy < -bc->box[1]*0.5) dy += bc->box[1];
      if (dz < -bc->box[2]*0.5) dz += bc->box[2];
    }

    len2 = dx * dx + dy * dy + dz * dz;

    len6 = len2 * len2 * len2;
    len12 = len6 * len6;
    len = sqrt(len2);
    
    vdw_index = ad->index[ad->vdw_type[i]+ad->vdw_type[j]*ad->ntype];
    if (vdw_index >= 0 /* || len2 > ad->hbond_criteria2 */) {
      vdw12 = ad->vdw12[vdw_index] / len12;
      vdw6 = ad->vdw6[vdw_index] / len6;
      /*
      *vdw += vdw12 - vdw6;
      force = 12.0 * vdw12 - 6.0 * vdw6;
      */
      ene_t = vdw12 - vdw6;
      *vdw += ene_t;
      force = 12.0 * vdw12 - 6.0 * vdw6;
    } else {
      vdw_index = - vdw_index - 2;
      hb12 = ad->hb12[vdw_index] / len12;
      hb10 = ad->hb10[vdw_index] / (len6 * len2 * len2);
      /*
      *hbond += hb12 - hb10;
      force = 12.0 * hb12 - 10.0 * hb10;
      */
      ene_t = hb12 - hb10;
      *hbond += ene_t;
      force = 12.0 * hb12 - 10.0 * hb10;
    }
    elec_t = ad->q[i] * ad->q[j] / len;
    
    *elec += elec_t;
    
    force = (force + elec_t) / len2;
    
    if (ad->atom_ene_sample_flag) {
      ad->atom_ene[i].type[ATOM_VDW_ENE]  += ene_t;
      ad->atom_ene[j].type[ATOM_VDW_ENE]  += ene_t;
      ad->atom_ene[i].type[ATOM_ELEC_ENE] += elec_t;
      ad->atom_ene[j].type[ATOM_ELEC_ENE] += elec_t;
    }
    
    ad->f[i].x += force * dx;
    ad->f[i].y += force * dy;
    ad->f[i].z += force * dz;
    
    ad->f[j].x -= force * dx;
    ad->f[j].y -= force * dy;
    ad->f[j].z -= force * dz;
      
    /** DEBUG **
    if (ad->q[i] != 0.0 && ad->q[j] != 0.0 && vdw12 != 0.0) 
      lprintf("%lf %lf %lf\n", len, (elec_t+ene_t)*S, force / len);
    }
    if (ad->q[i] != 0.0 && ad->q[j] != 0.0 && vdw12 != 0.0) 
      exit(1);
    ** endof DEBUG **/

    /* virial */
    ad->virial[0] += force * dx * dx;
    ad->virial[1] += force * dy * dy;
    ad->virial[2] += force * dz * dz;
  }
}
#endif

