########################################################################### #################### NONLINEAR FIT -- GRADIENT METHOD ################### ########################################################################### # # read data into data frame, from table created with Unix editor # the table has column headers ldh.df<-read.table("ldh.data",header=T) # # list data frame, extract a matrix element ldh.df ldh.df[1,3] # gives an error? as.matrix(ldh.df)[1,3] # # A data frame is _not_ a matrix. (It is a list.) as.list(ldh.df) # # initial guesses for the paramter values, as list # rather than try to figure them out, accept them from on high parameters.ldh<-list(Vmax=1e+00,KmA=1e-03,KmB=1e-02,KmAB=1e-03,C1=1e-03,C2=1e-06) # # View the results. parameters.ldh # # Read the documentation on nls(). ?nls # # Carry out a fit using nls() with the rate law described in eq. (1). ldh.out1<-nls(v0 ~ Vmax/(1 + C1*p + KmA/a + (KmB/b)*(1 + C1*p)*(1 + C2*p) + (KmAB/(a*b))*(1 + C1*p)), ldh.df, start=parameters.ldh, trace=TRUE) # #look at the results as.list(ldh.out1) summary(ldh.out1) residuals(ldh.out1) fitted.values(ldh.out1) ldh.df$v0-fitted.values(ldh.out1) - residuals(ldh.out1) coef(ldh.out1) summary(ldh.out1)$parameters summary(ldh.out1)$sigma sqrt(sum(residuals(ldh.out1)^2)/summary(ldh.out1)$df[2]) # # calculate the covariance matrix - sqrt diagonal element -> std dev vcov(ldh.out1) #(for S following works for var/covar matrix, but not needed with R) #rinv<-backsolve(ldh.out1$R,diag(summary(ldh.out1)$df[1]));rinv %*% t(rinv) # # starting estimates need not be names - could be vector elements nls(v0 ~ C[1]/(1 + C[5]*p + C[2]/a + (C[3]/b)*(1 + C[5]*p)*(1 + C[6]*p) + (C[4]/(a*b))*(1 + C[5]*p)), data=ldh.df,start=list(C=c(1e+00,1e-03,1e-02,1e-03,1e-03,1e-06))) # cleanup # rm(rinv) # not with R ########################################################################### #################### QUALITY OF THE NONLINEAR FIT ####################### ########################################################################### # 1. Are the residuals (deviations between measured and calculated values # = v0 - v0calc) normally distributed? x11() qqnorm(resid(ldh.out1)) qqline(resid(ldh.out1)) # This display also shows outliers - should one do anything? # To see what a quantile plot (or a quantile) is plot((rnorm(1000)),1:1000/1000) plot(sort(rnorm(1000)),1:1000/1000) lines(seq(-4,4,.001),pnorm(seq(-4,4,.001)),type="l") qqnorm(rnorm(1000)) # or #system("gv q*.ps") ########################################################################### #1a. Are the residuals uncorrelated? plot(resid(ldh.out1),type="l") lines(resid(ldh.out1)[sort.list(ldh.df$v)],type="l",lty=2,col=2) lines(resid(ldh.out1)[sort.list(ldh.df$a)],type="l",lty=4,col=4) lines(resid(ldh.out1)[sort.list(ldh.df$b)],type="l",lty=6,col=6) lines(resid(ldh.out1)[sort.list(ldh.df$p)],type="l",lty=8,col=8) #plot(resid(ldh.out1)[sort.list(ldh.df$v)],type="l") #plot(resid(ldh.out1)[sort.list(ldh.df$a)],type="l") #plot(resid(ldh.out1)[sort.list(ldh.df$b)],type="l") #plot(resid(ldh.out1)[sort.list(ldh.df$p)],type="l") ########################################################################### # 2. Is each parameter significant in influencing the fit? # What is the confidence level for each of the parameters being # different from 0? summary(ldh.out1)$parameters summary(ldh.out1)$df 1-pt(2.554559,20) 1-pt(min(summary(ldh.out1)$parameters[,3]),summary(ldh.out1)$df[2]) # Do the t and normal distributions give different confidence levels? 1-pnorm(2.554559) # As an exercise, calculate 67% confidence interval on Vmax. eps<-qt((1 - 67/100)/2,20)*summary(ldh.out1)$parameters[1,2] summary(ldh.out1)$parameters[1,1] + eps summary(ldh.out1)$parameters[1,1] - eps eps/summary(ldh.out1)$parameters[1,2] rm(eps) ########################################################################### # 3. Is the std. error of the fit reasonable from an experimental view? # Compare the std. error of the fit # to the min, max, and mean of the quantity fit. attach(ldh.df) range(v0) mean(v0) summary(ldh.out1)$sigma x<-c(min(v0), mean(v0), max(v0)) cbind(x,summary(ldh.out1)$sigma/x*100) detach(2) rm(x) ########################################################################### # 4. Are the same parameter estimates (the same fit) obtained with # different starting values of the parameters? If not, then the fit is # suspect. # We ignore this for the demo. ########################################################################### # 5. What is the effect of leaving parameters out of the fit? # We ignore this for the demo. ########################################################################### # 6. What is the effect of introducing the C3 parameter into the fit? # We ignore this for the demo. ########################################################################### #################### GRAPHICS DISPLAYS ################################## ########################################################################### # The quality of the data and of the results of the fits can be examined # by suitable graphics displays. Examination of multidimensional data, # i.e., the dependence of a response on more than one variable, requires # special strategies. ########################################################################### # [ xgobi displays of data ..... ] ########################################################################### # [ SGI-specific displays of data ..... ] ########################################################################### # 3. Plots made with R functions. R has a rich plotting language. The # following are relatively unsophisticated examples. # # Start the X11 device driver, # create an object with data and results of fit, # and for convenience attach it. x11() ldh.plot.df<-as.data.frame(cbind(ldh.df,v0calc=(ldh.df$v0 - resid(ldh.out1)))) attach(ldh.plot.df) # For scatterplots pairs( ~ v0 + a + b + p) pairs( ~ I(1/v0calc) + I(1/a) + I(1/b) + p) # For plots of a single variable, first eliminate data # with p>0. nop<-ldh.plot.df[ldh.plot.df$p==0,] # Make a reciprocal plot of v0 for varied [NADH], and add # points for calculated values. plot(1/nop$a,1/nop$v0) points(1/nop$a,1/nop$v0calc,pch="+") plot(1/nop$b,1/nop$v0) points(1/nop$b,1/nop$v0calc,pch="+") # Plot the cumulative distribution function for each # variable, which for a numeric variable is: sort(x) vs # ppoints(x), where ppoints(x) is a vector of # probabilities. plot(ldh.plot.df) plot(nop) # Plot the cumulative distribution function for a single # variable. plot(ppoints(v0), sort(v0)) plot(ppoints(v0-v0calc), sort(v0-v0calc)) # Make a quantile plot of the residuals. qqnorm(v0-v0calc) qqline(v0-v0calc) # Box plots also display the distribution of a variable. boxplot(v0,v0calc) boxplot(v0-v0calc) # R can produce "3-d" plots == perspective plots. All # duplicate values of the independent variables must be # removed. Also, include data only for p=0, to simplify # the surface. nop<-ldh.plot.df[ldh.plot.df$p==0,] nop.uniq <- nop[!duplicated(paste(nop[, "a"], nop[, "b"])),] # Values of z (1/v0) are calculated over a grid of x (a) # and y (b) points. # need library akima to bring in interp functions # [may have to download binary or source code] # need library graphics to bring in persp functions # [may need other packages, if not loaded by default: # methods, stats, utils] library("akima") i<-interp(1/nop.uniq$a, 1/nop.uniq$b, 1/nop.uniq$v0) j<-interp(1/nop.uniq$a, 1/nop.uniq$b, 1/nop.uniq$v0calc) k<-interp(1/nop.uniq$a, 1/nop.uniq$b, 1/nop.uniq$v0 - 1/nop.uniq$v0calc) l<-interp(nop.uniq$a, nop.uniq$b, nop.uniq$v0) m<-interp(nop.uniq$a, nop.uniq$b, nop.uniq$v0calc) n<-interp(nop.uniq$a, nop.uniq$b, nop.uniq$v0 - nop.uniq$v0calc) # Draw perspective plots. persp(i) persp(j) persp(k) persp(l) persp(m) persp(n) # Draw contour plots. contour(i,nlevels=30) contour(j,nlevels=30) contour(k,nlevels=30) contour(l,nlevels=30) contour(m,nlevels=30) contour(n,nlevels=30) # Clean up. detach(2) #repeat until get rid of ldh.... (but not libraries attached) rm(i,j,k,l,m,n) ########################################################################### #################### LINEAR FITS ######################################## ########################################################################### # R has powerful operators to handle linear models. # The LDH rate equation can be thrown into linear form if the denominator # terms with product are deleted, i.e., if measurements are only for p=0. # Make the new data frame. attach(ldh.df) objects(2) ldh.nop.df<-data.frame(v0,a,b)[1:20,] # For reference carry out a nonlinear fit for # ldh.nop.df. ldh.nop.out1<-nls(v0 ~ (Vmax/(1 + KmA/a + KmB/b + KmAB/(a*b))), ldh.nop.df,list("Vmax"=1e+00,"KmA"=1e-03,"KmB"=1e-02,"KmAB"=1e-03)) # And now a linear fit of the reciprocal rate equation # with lm(). lm.out1<-lm(1/v0 ~ I(1/a)*I(1/b),data=ldh.nop.df) # And weight the linear fit by the theoretical v0^4. lm.wt.v04.out1<-lm(1/v0 ~ I(1/a)*I(1/b),data=ldh.nop.df, weights=ldh.nop.df$v0^4) # Compare the weighted linear fit and the nonlinear fit. x<-coef(lm.wt.v04.out1) (coef(ldh.nop.out1) - x/c(x[1]^2,x[1],x[1],x[1]))/coef(ldh.nop.out1) # The differences in parameter estimates range from 0.25 # to 1.3 percent. This is plausible, given the weighting # introduced, which is itself subject to the uncertainty # of the sample. # Compare the unweighted linear fit and the nonlinear # fit. x<-coef(lm.out1) (coef(ldh.nop.out1) - x/c(x[1]^2,x[1],x[1],x[1]))/coef(ldh.nop.out1) # The differences range from 7.6 to 28 percent, as # expected because of the incorrect weighting. It is not # nice to do an unweighted reciprocal plot. # A difficulty with the linear fit to a reciprocal model is that the # estimates of the standard error in the parameters are for (1/Vmax) and # (KmX/Vmax), X={A, B, AB}. Typically one wants uncertainties for Vmax # and KmX, as are obtained with the nonlinear model of the nls() fit. # The uncertainties for Vmax and KmX are calculated as: # var(z) = sum_over_i sum_over_j d(z)/d(P_i)*d(z)/d(P_j)*covar_i_j # Where P_i are the linear model parameters, (1/Vmax) and (KmX/Vmax). # Covar_i_j is the covariance matrix for the linear model. Var(z) is the # variance to be calculated for the parameter Vmax or KmX. z is a # function expressing the parameter Vmax or KmX in terms of the linear # model parameters, (1/Vmax) and (KmX/Vmax). The calculation is # appropriately carried out within a function. lm.to.nls <- function(lmout) { #for linear fit to LDH data #compute values and std.error of Vmax and KmA, etc. #from values and std.errors of 1/Vmax and KmA/Vmax, etc. # #value: data frame with estimate, std.error and t value for each parameter #argument: data frame returned by lm() # std.error<-numeric(4) parameters<-numeric(4) sigmasq<-sum(summary(lmout)$residuals^2)/16 cij<-summary(lmout)$cov*sigmasq # same as vcov(lmout) x<-coef(lmout) parameters[1]<-1/x[1] std.error[1]<-sqrt(1/x[1]^4*cij[1,1]) for (i in 2:4) { parameters[i]<-x[i]/x[1] std.error[i]<-sqrt((1/x[1])^2*cij[i,i] + (-x[i]/(x[1]^2))^2*cij[1,1] + 2*(1/x[1])*(-x[i]/(x[1]^2))*cij[1,i]) } data.frame(parameters = parameters, std.error = std.error, t = parameters/std.error) } # Run this function, lm.to.nls(), on the results of the # weighted linear fit and compare with the results of the # nonlinear fit. x<-lm.to.nls(lm.wt.v04.out1) x summary(ldh.nop.out1)$parameters (summary(ldh.nop.out1)$parameters - x)/x # The ldh.nop.out1 values for Vmax and KmA are close to # the values calculated from lm.wt.v04.out1. The largest # differences are in KmAB: 1.3 percent in the parameter # estimate, 3 percent in the std. error, and 4.5 percent # in t. # Analysis of variance, with the function aov(), can be # used to assess the linear model. summary(aov(1/v0 ~ I(1/a) + I(1/b) + I(1/a):I(1/b),data=ldh.nop.df, weights=ldh.nop.df$v0^4)) # The term with the coefficient (KmAB/Vmax), which is the # least significant factor, enters into the fit at the # .005 confidence level. # The confidence level for the (KmAB/Vmax) parameter # estimated from the t value of the weighted linear fit # is .0025. # When viewing the surface, 1/v0 = f(1/a, 1/b), a slight # twist was seen. We can examine the origin of this # twist. # Carry out a linear fit without the term # (KmAB/Vmax)*(1/a)*(1/b). lm.wt.nokmab.out1<-lm(1/v0 ~ I(1/a) + I(1/b),data=ldh.nop.df, weights=ldh.nop.df$v0^4) # Construct data frames for use in graphics displays. ldh.nop.plot.df<-as.data.frame(cbind(ldh.nop.df,v0calc=(ldh.nop.df$v0 - residuals(ldh.nop.out1)))) lm.nokmab.plot.df<-as.data.frame(cbind(ldh.nop.df, v0calc=fitted(lm.wt.nokmab.out1))) # Display of the results with xgobi shows that the plane # of the calculated values is not twisted for # lm.nokmab.plot.df and is twisted for ldh.nop.plot.df, # showing that the twist comes from a contribution of the # cross product term 1/a * 1/b.