/* Jason Lawrence Princeton University acls.cc Several variants of the Alternating Constrained Least Squares (ACLS) algorithm described in: Lawrence, J., Ben-Artzi, A., DeCoro, C., Matusik, W., Pfister, H., Ramamoorthi, R., Rusinkiewicz, S. Inverse Shade Trees for Non-Parametric Material Representation and Editing. ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH 2006). This code calls several functions provided by the Numerical Algorithms Group (NAG) library set and Intel's Math Kernel Library. History: 4/2006 - Created (jason) */ #include #include #include #include #include #include #include // NAG library headers #include #include #include // Intel's Math Kernel Library CBLAS headers #include #include "acls.h" #include "timer.h" extern float *H_solidangles; extern bool coltex; void show_brdf (double *H, int th, int td, int pd); // Allocate memory for ACLS workspace object. acls_ws_t * create_acls_ws (int M, int N, int max_k) { acls_ws_t *ws = (acls_ws_t *)malloc(sizeof(acls_ws_t)); if (!ws) return 0; ws->max_k = max_k; ws->Wt = (double *)malloc(sizeof(double)*M*(max_k+1)); ws->Ht = (double *)malloc(sizeof(double)*(N+2)*(max_k+1)); ws->b = (double *)malloc(sizeof(double)*(MAX(M,N)+2)); ws->x = (double *)malloc(sizeof(double)*max_k*(max_k+1)); ws->kx = (long *)malloc(sizeof(long)*(max_k+1)); bzero(ws->kx,sizeof(long)*(max_k+1)); ws->bl = (double *)malloc(sizeof(double)*(max_k+2)); ws->bu = (double *)malloc(sizeof(double)*(max_k+2)); ws->cvec = (double *)malloc(sizeof(double)*(max_k+1)); ws->sums = (double *)malloc(sizeof(double)*max_k); ws->objs = (double *)malloc(sizeof(double)*max_k); ws->inc = (int *)malloc(sizeof(int)*M); ws->WH = (float *)malloc(sizeof(float)*M*N); ws->A = (double *)malloc(sizeof(double)*(max_k+1)); if (!ws->WH) { fprintf(stderr, "Out of memory in create_acls_ws.\n"); exit(1); } return ws; } // Free memory associated with ACLS workspace object. void free_acls_ws(acls_ws_t *ws) { delete [] ws->Wt; delete [] ws->Ht; delete [] ws->b; delete [] ws->x; delete [] ws->kx; delete [] ws->bl; delete [] ws->bu; delete [] ws->cvec; delete [] ws->sums; delete [] ws->objs; delete [] ws->inc; delete [] ws->WH; delete [] ws->A; } // Compute error metric proposed in Equation 4. double spunerrf(float *V, float *B, int M, int N, float *W, float *H, int K, float *WH, float lambda, float mu, int *inc) { cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1.0, W, K, H, N, 0.0, WH, N); float *_V=V; float *_B=B; float *_WH=WH; float fit = 0.0; for(int i=0; i *lbound, // General value boundary constraints std::vector *ubound) { static Nag_E04_Opt *options = 0; static NagError *fail = 0; static int nag_init_k = 0; static int nag_init_m = 0; static int nag_init_n = 0; if (options && (K!=nag_init_k || M!=nag_init_m || N!=nag_init_n)) { e04xzc(options,"all",fail); delete options; options = 0; delete fail; fail = 0; } if (!options) { options = new Nag_E04_Opt; fail = new NagError; e04xxc(options); options->prob=Nag_LS1; options->list = false; options->print_level = Nag_NoPrint; options->print_deriv = Nag_D_NoPrint; bzero(fail,sizeof(NagError)); fail->print = FALSE; nag_init_k = K; nag_init_m = M; nag_init_n = N; } double *Ht = ws->Ht; double *b = ws->b; double *x = ws->x; long *kx = ws->kx; double *bl = ws->bl; double *bu = ws->bu; for (int j=0; jobjs[k]),options,NAGCOMM_NULL,fail); options->used = TRUE; } // for (int k=0; kobjs[j] < ws->objs[inc[i]]) inc[i]=j; // Store new mixing weights for (int j=0; joptim_tol=.0001; options->prob=Nag_LS1; options->list = false; options->print_level = Nag_NoPrint; options->print_deriv = Nag_D_NoPrint; // options->max_iter = 1000; bzero(fail,sizeof(NagError)); fail->print = FALSE; nag_init_k = K; nag_init_m = M; nag_init_n = N; } double *Wt = ws->Wt; double *b = ws->b; double *x = ws->x; long *kx = ws->kx; double *bl = ws->bl; double *bu = ws->bu; for (int j=0; jobjf,options,NAGCOMM_NULL,fail); options->used = TRUE; // Store new basis reflectance values for (int j=0; joptim_tol=.0001; options->prob=Nag_LS1; options->list = false; options->print_level = Nag_NoPrint; options->print_deriv = Nag_D_NoPrint; // options->max_iter = 1000; bzero(fail,sizeof(NagError)); fail->print = FALSE; nag_init_k = K; nag_init_m = M; nag_init_n = N; } double *Wt = ws->Wt; double *b = ws->b; double *x = ws->x; long *kx = ws->kx; double *bl = ws->bl; double *bu = ws->bu; for (int j=0; jobjf,options,NAGCOMM_NULL,fail); options->used = TRUE; // Store new basis reflectance values for (int j=0; j *t, // Vector of time stamps std::vector *ssds, // Vector or error stamps std::vector *lbound, // Value bounds for W std::vector *ubound) { if (!ws || K > ws->max_k) { fprintf(stderr, "Invalid K value for workspace in [acls].\n"); exit(1); } int *inc = ws->inc; bzero(inc,sizeof(int)*M); // double old_ssd = sperrf(V,B,M,N,W,H,K,ws->WH,lambda,inc); double old_ssd = spunerrf(V,B,M,N,W,H,K,ws->WH,lambda,mu,inc); printf("ACLS: initial SSD %.6e\n", old_ssd); fflush(stdout); if (t) t->push_back(0.0); if (ssds) ssds->push_back(old_ssd); double tel=0.0; Timer timer; timer.start(); // Iterate for (int it=0;(max_iters==0 || itWH, lambda, mu, inc); printf("\r%d cls_W: %.6e ", it,tssd); printf("\n"); fflush(stdout); // Update H #if 1 // Update routine considers H one column at a time cls_H(V,B,R,M,N,W,H,K,ws,lambda); #else // Update routine considers all H simultaneously (necessary for energy conservation constraints). // Takes much longer! cls_H_full(V,B,M,N,W,H,K); #endif tssd = spunerrf(V, B, M, N, W, H, K, ws->WH, lambda, mu, inc); printf("\r%d cls_H: %.6e ", it,tssd); printf("\n"); fflush(stdout); if (cb) cb(it,W,H,M,N,K); if ( it % 1 == 0 ) { double secs = timer.stop(); tel+=secs; float ssd = spunerrf(V, B, M, N, W, H, K, ws->WH, lambda, mu, inc); float ossd = sperrf(V, B, M, N, W, H, K, ws->WH, 0.0, inc); if (it > 0) { double secs_iter = secs / 1.0; printf("\tsecs per iter ~ %.2f ", secs_iter); } fflush(stdout); if (t) t->push_back(tel); if (ssds) ssds->push_back(ssd); if ( (it > min_iters) && (ssd > old_ssd) ) { fprintf(stderr, "\n\tError increased!"); old_ssd = ssd; } if ( (it > min_iters) && (fabs(1.0-ssd/old_ssd) < .01) ) { fprintf(stdout, "\n\tACLS converged."); old_ssd = ssd; break; } old_ssd = ssd; timer.start(); } } printf("\n"); fflush(stdout); return old_ssd; }