From 0a5087d5db750d9a09dd8dd35d7a2a1e986c4359 Mon Sep 17 00:00:00 2001 From: "Olivier J.N. Bertrand" <olivier.bertrand@uni-bielefeld.de> Date: Thu, 1 Nov 2018 10:42:11 +0100 Subject: [PATCH] Start addinng dlt with more than 11 params --- .../02-recording-animal-trajectory.ipynb | 116 ++++++++++++- navipy/arenatools/cam_dlt.py | 159 +++++++++++++++--- navipy/scripts/dlt_calibrator.py | 26 ++- 3 files changed, 270 insertions(+), 31 deletions(-) diff --git a/doc/source/tutorials/02-recording-animal-trajectory.ipynb b/doc/source/tutorials/02-recording-animal-trajectory.ipynb index 5572030..2b74d09 100644 --- a/doc/source/tutorials/02-recording-animal-trajectory.ipynb +++ b/doc/source/tutorials/02-recording-animal-trajectory.ipynb @@ -6,23 +6,125 @@ "source": [ "# Recording animal trajectory\n", "\n", - "## Camera calibration\n", + "## Camera recording\n", "\n", - "A camera is an optical intrument for aquiring images. Cameras are composed of a sensors (converting photon to electrical signal) and a lens (foccussing light rays on the sensors). \\\n", - "Many experimental paradigm require the recording of animal motion with one or more camera. \\\n", - "The experimenter is more interested by the position of the animal in his or her arena (world \\\n", + "A camera is an optical intrument for aquiring images. Cameras are composed of a sensors (converting photon to electrical signal) and a lens (foccussing light rays on the sensors). \n", + "Many experimental paradigm require the recording of animal motion with one or more camera. \n", + "The experimenter is more interested by the position of the animal in his or her arena (world \n", "coordinate system) than by the position of the animal within the image (camera-coordinate system). Therefore, we need to transform the position of the animal in the image to its position in the world. To do so, we need to calibrate our camera. This calibration require two steps:\n", "\n", "* remove distortion from the lens (intrinsic calibration)\n", "* determine the position and orientation (i.e. pose) of the camera in the environment (extrinsic calibration)\n", "\n", - "### Intrinsic calibration\n", + "### Direct Linear Transformation (DLT)\n", + "\n", + "When we observe an animal through a camera, we only observe the projection of the animal. For example the center of mass of the animal in three dimension is projected in two dimension on the camera, i.e. in the camera plane, and only these two dimensions are visible from the camera. Mathematically, the point in 3D space is transformed into the camera space. This transformation can be described as follow:\n", + "\n", + "$$\n", + "\\begin{align}\n", + "\\begin{bmatrix} u-u_0 \\\\ v-v_0 \\\\ -d \\end{bmatrix} &= T \\begin{bmatrix} x-x_0 \\\\ y-y_0 \\\\ z-z_0 \\end{bmatrix} \\\\\n", + "&=c \\begin{bmatrix} r_{11} & r_{12} & r_{13} \\\\ r_{21} & r_{22} & r_{23} \\\\ r_{31} & r_{32} & r_{33} \\end{bmatrix}\\begin{bmatrix} x-x_0 \\\\ y-y_0 \\\\ z-z_0 \\end{bmatrix}\n", + "\\end{align}\n", + "$$\n", + "\n", + "with:\n", + "* $u$, $v$ the position of the animal on the camera\n", + "* $x$, $y$, $z$ the position of the animal in the environment\n", + "* $u_0$, $v_0$, the center of the camera\n", + "* $x_0$, $y_0$, $z_0$ the center of the environment (origin)\n", + "* $d$, the distance between the point on the camera and the point in the environment\n", + "* $c$, a constant of colinarity\n", + "\n", + "**Note** The unit of $u$, $v$ , $u_0$, and $v_0$ have the same units than $x,y,z$. However, $u$ and $v$ will be usually measured in pixels, and thus we need to further transform $u$ and $v$ by a constant of proportionality. $u-u_0 \\Rightarrow \\lambda_u(u-u_0)$, and $v-v_0 \\Rightarrow \\lambda_v(v-v_0)$.\n", + "\n", + "#### Calibration\n", + "\n", + "We ultimatly want to have the position of the animal within its environment from two or more cameras. To be able to reconstruct (triangulate) the animal position, we first need to have a method to describe the transformation from camera to world and vice versa, i.e. obtained the parameters of the transformation from known points. This process is called camera calibration.\n", + "\n", + "We need to express $u$ and $v$ as a function of $x$, $y$, and $z$. \n", + "\n", + "$$\n", + "u = \\frac{L_1x+L_2y+L_3z+L_4}{L_9x+L_{10}y+L_{11}z+L_1}\\text{, and }\n", + "v = \\frac{L_5x+L_6y+L_7z+L_8}{L_9x+L_{10}y+L_{11}z+L_1}\n", + "$$\n", + "\n", + "the coefficients $L_1$ to $L_{11}$ are the DLT parameters that reflect the relationships between the environment frame of our animal and the image frame. \n", + "\n", + "The system of equation above can rewritten as a product of matrices:\n", + "$$\n", + "\\begin{bmatrix} u \\\\ v \\end{bmatrix} = \\begin{bmatrix} x & y & z & 1 & 0 & 0 & 0 & 0 & -ux & -uy & -uz \\\\ 0 & 0 & 0 & 0 & x & y & z & 1 & -vx & -vy & -vz \\end{bmatrix}\\begin{bmatrix} L_1 \\\\ L_2 \\\\ . \\\\ L_{10} \\\\ L_{11} \\end{bmatrix}\n", + "$$\n", + "\n", + "We can expand this equations for $n$ points visible on the camera:\n", + "\n", + "$$\n", + "\\begin{bmatrix} \n", + "u_1 \\\\ v_1 \\\\\n", + "u_2 \\\\ v_2 \\\\\n", + "... \\\\\n", + "u_{n-1} \\\\ v_{n-1} \\\\\n", + "u_n \\\\ v_n \\\\\n", + "\\end{bmatrix} = \n", + "\\begin{bmatrix} \n", + "x_1 & y_1 & z_1 & 1 & 0 & 0 & 0 & 0 & -u_1x_1 & -u_1y_1 & -u_1z_1 \\\\ \n", + "0 & 0 & 0 & 0 & x_1 & y_1 & z_1 & 1 & -v_1x_1 & -v_1y_1 & -v_1z_1 \\\\\n", + "x_2 & y_2 & z_2 & 1 & 0 & 0 & 0 & 0 & -u_2x_2 & -u_2y_2 & -u_2z_2 \\\\ \n", + "0 & 0 & 0 & 0 & x_2 & y_2 & z_2 & 1 & -v_2x_2 & -v_2y_2 & -v_2z_2 \\\\\n", + ". \\\\\n", + "x_{n-1} & y_{n-1} & z_{n-1} & 1 & 0 & 0 & 0 & 0 & -u_{n-1}x_{n-1} & -u_{n-1}y_{n-1} & -u_{n-1}z_{n-1} \\\\ \n", + "0 & 0 & 0 & 0 & x_{n-1} & y_{n-1} & z_{n-1} & 1 & -v_{n-1}x_{n-1} & -v_{n-1}y_{n-1} & -v_{n-1}z_{n-1} \\\\\n", + "x_n & y_n & z_n & 1 & 0 & 0 & 0 & 0 & -u_nx_n & -u_ny_n & -u_nz_n \\\\ \n", + "0 & 0 & 0 & 0 & x_n & y_n & z_n & 1 & -v_nx_n & -v_ny_n & -v_nz_n \\end{bmatrix}\n", + "\\begin{bmatrix} L_1 \\\\ L_2 \\\\ . \\\\ L_{10} \\\\ L_{11} \\end{bmatrix}\n", + "$$\n", + "\n", + "\n", + "If we have 11 equations or more we can could deterime the DLT parameters from the system of equations. Since each points in the environment will lead to two variables $u$ and $v$, we only need 6 points to be able to obtain the DLT parameters\n", + "\n", + "#### Reconstruction\n", + "\n", + "\n", + "#### Exercises\n", + "* Derive from the $r_11$ to $r_33$, $d$ and $c$ what are $L_1$ to $L_{11}$\n", + "\n", + "#### 16 DLT parameters\n", + "The DLT method exposed above does not correct for optical distortion nor de-centering distortion. \n", + "\n", + "$$\n", + "\\frac{1}{R}\\begin{bmatrix} u \\\\ v \\end{bmatrix} = \n", + "\\frac{1}{R}\\begin{bmatrix} \n", + "x & y & z & 1 & 0 & 0 & 0 & 0 & -ux & -uy & -uz & \\zeta r^2R & \\zeta r^4R & \\zeta r^6R & (r^2+2\\zeta^2)R & \\zeta\\eta R \\\\ \n", + "0 & 0 & 0 & 0 & x & y & z & 1 & -vx & -vy & -vz & \\eta r^2R & \\eta r^4R & \\eta r^6R & \\zeta\\eta R & (r^2+2\\eta^2)R\\\\ \\end{bmatrix}\\begin{bmatrix} L_1 \\\\ L_2 \\\\ . \\\\ L_{15} \\\\ L_{16} \\end{bmatrix}\n", + "$$\n", + "\n", + "with \n", + "* $R=L_9x + L_{10}y + L_{11}z +1$, \n", + "* $r^2 = \\eta^2 + \\zeta^2$, \n", + "* $\\zeta = u-u_0$, and \n", + "* $\\eta = v-v_0$ " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Alternative method\n", + "\n", + "A camera is an optical intrument for aquiring images. Cameras are composed of a sensors (converting photon to electrical signal) and a lens (foccussing light rays on the sensors). \n", + "Many experimental paradigm require the recording of animal motion with one or more camera. \n", + "The experimenter is more interested by the position of the animal in his or her arena (world \n", + "coordinate system) than by the position of the animal within the image (camera-coordinate system). Therefore, we need to transform the position of the animal in the image to its position in the world. To do so, we need to calibrate our camera. This calibration require two steps:\n", + "\n", + "* remove distortion from the lens (intrinsic calibration)\n", + "* determine the position and orientation (i.e. pose) of the camera in the environment (extrinsic calibration)\n", + "\n", + "#### Intrinsic calibration\n", "\n", "`With Matlab <https://de.mathworks.com/help/vision/ug/single-camera-calibrator-app.html>`_\n", "\n", "`With Opencv <https://docs.opencv.org/3.3.1/dc/dbb/tutorial_py_calibration.html>`_\n", "\n", - "### Extrinsic calibration\n", + "#### Extrinsic calibration\n", "\n", "To obtain the pose of the camera (i.e. the extrinsic parameters), we need \n", "to find a transformation such that the projection of physical points matche\n", @@ -33,7 +135,7 @@ "Using Ransac decrease the bad effect that outliers may have on the pose\n", "estimation.\n", "\n", - "#### Building your own manhattan\n", + "## Building your own manhattan\n", "\n", "The manhattan should contains at least 7 towers, and not all towers should\n", "share the same plane or line. For best results it is recommended that the\n", diff --git a/navipy/arenatools/cam_dlt.py b/navipy/arenatools/cam_dlt.py index 3087749..baa8bd3 100644 --- a/navipy/arenatools/cam_dlt.py +++ b/navipy/arenatools/cam_dlt.py @@ -101,6 +101,53 @@ units are [u,v] i.e. camera coordinates or pixels return xyz, rmse +def dlt_principal_point(coeff): + normalisation = np.sum(coeff[8:11]**2) + u_0 = np.sum(coeff[0:3]*coeff[8:11])/normalisation + v_0 = np.sum(coeff[4:7]*coeff[8:11])/normalisation + return u_0, v_0 + + +def dlt_principal_distance(coeff): + normalisation = np.sum(coeff[8:11]**2) + return 1/np.sqrt(normalisation) + + +def dlt_scale_factors(coeff): + u_0, v_0 = dlt_principal_point(coeff) + + normalisation = np.sum(coeff[8:11]**2) + du = (u_0*coeff[8] - coeff[0])**2 + du += (u_0*coeff[9] - coeff[1])**2 + du += (u_0*coeff[10] - coeff[2])**2 + du /= normalisation + + dv = (v_0*coeff[8] - coeff[4])**2 + dv += (v_0*coeff[9] - coeff[5])**2 + dv += (v_0*coeff[10] - coeff[6])**2 + dv /= normalisation + return du, dv + + +def dlt_transformation_matrix(coeff): + u_0, v_0 = dlt_principal_point(coeff) + d = dlt_principal_distance(coeff) + du, dv = dlt_scale_factors(coeff) + transform = np.zeros((3, 3)) + transform[0, 0] = (u_0*coeff[8]-coeff[0])/du + transform[0, 1] = (u_0*coeff[9]-coeff[1])/du + transform[0, 2] = (u_0*coeff[10]-coeff[2])/du + + transform[1, 0] = (v_0*coeff[8]-coeff[4])/dv + transform[1, 1] = (v_0*coeff[9]-coeff[5])/dv + transform[1, 2] = (v_0*coeff[10]-coeff[6])/dv + + transform[2, 0] = coeff[8] + transform[2, 1] = coeff[9] + transform[2, 2] = coeff[10] + return d*transform + + def dlt_inverse(coeff, frames): """ This function reconstructs the pixel coordinates of a 3D coordinate as @@ -123,10 +170,68 @@ def dlt_inverse(coeff, frames): coeff[5]+frames[:, 2]*coeff[6]+coeff[7] uv[:, 0] /= normalisation uv[:, 1] /= normalisation + # Apply distortion + delta_uv = np.zeros((frames.shape[0], 2)) + u_0, v_0 = dlt_principal_point(coeff) + zeta = uv[:, 0] - u_0 + eta = uv[:, 1] - v_0 + rsq = zeta**2 + eta**2 + if coeff.shape[0] > 11: + delta_uv[:, 0] += zeta*(coeff[11]*rsq) + delta_uv[:, 1] += eta*(coeff[11]*rsq) + if coeff.shape[0] > 13: + delta_uv[:, 0] += zeta*(coeff[12]*(rsq**2) + coeff[13]*(rsq**3)) + delta_uv[:, 1] += eta*(coeff[12]*(rsq**2) + coeff[13]*(rsq**3)) + if coeff.shape[0] > 15: + delta_uv[:, 0] += coeff[14]*(rsq + 2*(zeta**2)) + coeff[15]*eta*zeta + delta_uv[:, 1] += coeff[15]*(rsq + 2*(eta**2)) + coeff[14]*eta*zeta + # print(eta, zeta, rsq) + uv += delta_uv return uv -def dlt_compute_coeffs(frames, campts): +def _dlt_matrices_calib(vframes, vcampts, nparams=11, l1to11=np.zeros(11)): + # re arange the frame matrix to facilitate the linear least + # sqaures solution + if nparams < 11: + nparams = 11 + matrix = np.zeros((vframes.shape[0]*2, 16)) # 16 for the dlt + u_0, v_0 = dlt_principal_point(l1to11) + for num_i, index_i in enumerate(vframes.index): + # eta, zeta, Rsq depends on L9to11 + zeta = vcampts.loc[index_i, 'u'] - u_0 # -u_0 = 0 ?? + eta = vcampts.loc[index_i, 'v'] - v_0 # -v_0 = 0 + R = np.sum(l1to11[-3:] * vframes.loc[index_i, ['x', 'y', 'z']])+1 + rsq = eta**2 + zeta**2 + # populate matrix + matrix[2*num_i, 0:3] = vframes.loc[index_i, ['x', 'y', 'z']] + matrix[2*num_i+1, 4:7] = vframes.loc[index_i, ['x', 'y', 'z']] + matrix[2*num_i, 3] = 1 + matrix[2*num_i+1, 7] = 1 + matrix[2*num_i, 8:11] = vframes.loc[index_i, + ['x', 'y', 'z']]*(-vcampts.loc[index_i, 'u']) + matrix[2*num_i+1, 8: 11] = vframes.loc[index_i, + ['x', 'y', 'z']]*(-vcampts.loc[index_i, 'v']) + # 12th parameter + matrix[2*num_i, 11] = zeta*rsq*R + matrix[2*num_i+1, 11] = eta*rsq*R + # 13th and 14th parameters + matrix[2*num_i, 12] = zeta*(rsq**2)*R + matrix[2*num_i+1, 12] = eta*(rsq**2)*R + matrix[2*num_i, 13] = zeta*(rsq**3)*R + matrix[2*num_i+1, 13] = eta*(rsq**3)*R + # 15th and 16th parameters + matrix[2*num_i, 12] = (rsq + 2*(zeta**2))*R + matrix[2*num_i+1, 12] = eta*zeta*R + matrix[2*num_i, 13] = eta*zeta*R + matrix[2*num_i+1, 13] = (rsq + 2*(eta**2))*R + + matrix[2*num_i, :] /= R + matrix[2*num_i+1, :] /= R + return matrix[:, : nparams] + + +def dlt_compute_coeffs(frames, campts, nparams=11, niter=100): """ A basic implementation of 11 parameters DLT @@ -142,36 +247,44 @@ single plane. # remove NaNs valid_idx = frames.dropna(how='any').index valid_idx = campts.loc[valid_idx, :].dropna(how='any').index - # valid df vframes = frames.loc[valid_idx, :] vcampts = campts.loc[valid_idx, :] - - # re arange the frame matrix to facilitate the linear least - # sqaures solution - matrix = np.zeros((vframes.shape[0]*2, 11)) # 11 for the dlt - for num_i, index_i in enumerate(vframes.index): - matrix[2*num_i, 0:3] = vframes.loc[index_i, ['x', 'y', 'z']] - matrix[2*num_i+1, 4:7] = vframes.loc[index_i, ['x', 'y', 'z']] - matrix[2*num_i, 3] = 1 - matrix[2*num_i+1, 7] = 1 - matrix[2*num_i, 8:11] = \ - vframes.loc[index_i, ['x', 'y', 'z']]*(-vcampts.loc[index_i, 'u']) - matrix[2*num_i+1, 8:11] = \ - vframes.loc[index_i, ['x', 'y', 'z']]*(-vcampts.loc[index_i, 'v']) - - # re argen the campts array for the linear solution - vcampts_f = np.reshape(np.flipud(np.rot90(vcampts)), vcampts.size, 1) - print(vcampts_f.shape) - print(matrix.shape) + # Get the matrices for calib + matrix = _dlt_matrices_calib(vframes, vcampts) + vcampts_f = vcampts.values.flatten() # [u_1, v_1, ... u_n, v_n] # get the linear solution the 11 parameters coeff = np.linalg.lstsq(matrix, vcampts_f, rcond=None)[0] # compute the position of the frame in u,v coordinates given the linear # solution from the previous line - matrix_uv = dlt_inverse(coeff, vframes) - # compute the rmse between the ideal frame u,v and the # recorded frame u,v rmse = np.sqrt(np.mean(np.sum((matrix_uv-vcampts)**2))) - return coeff, rmse + if nparams == 11: + return coeff, rmse + # Now we can try to guess the other coefficients + if nparams in [12, 14, 16]: + for _ in range(niter): + # 9th to 11th parameters are index 8 to 10 (0 being the 1st param) + l1to11 = coeff[: 11] + # Get the matrices for calib + matrix = _dlt_matrices_calib(vframes, vcampts, + nparams=nparams, l1to11=l1to11) + vcampts_normed = vcampts_f.copy() + for num_i, index_i in enumerate(vframes.index): + normalisation = np.sum( + l1to11[-3:] * vframes.loc[index_i, ['x', 'y', 'z']])+1 + vcampts_normed[2*num_i] /= normalisation + vcampts_normed[2*num_i + 1] /= normalisation + coeff = np.linalg.lstsq(matrix, vcampts_normed, rcond=None)[0] + print(coeff) + # compute the position of the frame in u,v coordinates given the linear + # solution from the previous line + matrix_uv = dlt_inverse(coeff, vframes) + # compute the rmse between the ideal frame u,v and the + # recorded frame u,v + rmse = np.sqrt(np.mean(np.sum((matrix_uv-vcampts)**2))) + return coeff, rmse + else: + raise ValueError('nparams can be either [11,12,14,16]') diff --git a/navipy/scripts/dlt_calibrator.py b/navipy/scripts/dlt_calibrator.py index ed9b12a..409986b 100644 --- a/navipy/scripts/dlt_calibrator.py +++ b/navipy/scripts/dlt_calibrator.py @@ -5,6 +5,10 @@ import cv2 import os from navipy.arenatools.cam_dlt import dlt_inverse from navipy.arenatools.cam_dlt import dlt_compute_coeffs +from navipy.arenatools.cam_dlt import dlt_principal_point +from navipy.arenatools.cam_dlt import dlt_principal_distance +from navipy.arenatools.cam_dlt import dlt_scale_factors +from navipy.arenatools.cam_dlt import dlt_transformation_matrix keybinding = dict() @@ -42,6 +46,16 @@ def parser_dlt_calibrator(): parser.add_argument('-s', '--scale', default=0.5, help=arghelp) + arghelp = 'number of dlt parameters (coeff)' + parser.add_argument('-c', '--ndltcoeff', + type=int, + default=11, + help=arghelp) + arghelp = 'number of iteration for calibration' + parser.add_argument('-e', '--epoque', + type=int, + default=100, + help=arghelp) return parser @@ -71,6 +85,8 @@ def main(): # The image may need to be scale because screen may have # less pixel than the image scale = args['scale'] + ndlt_coeff = args['ndltcoeff'] + niter = args['epoque'] imageref = cv2.imread(args['image']) # define some constants showframe_ref = 50 @@ -144,9 +160,17 @@ def main(): break if key == ord("c"): print('calibrate') - coeff, rmse = dlt_compute_coeffs(frames, campts) + coeff, rmse = dlt_compute_coeffs( + frames, campts, nparams=ndlt_coeff, + niter=niter) print(rmse) print(coeff) + print('principal points: {}'.format(dlt_principal_point(coeff))) + print('principal distance: {}'.format( + dlt_principal_distance(coeff))) + print('scale factor: {}'.format(dlt_scale_factors(coeff))) + print('transform:') + print(dlt_transformation_matrix(coeff)) coeff = pd.Series(data=coeff) coeff.to_csv(os.path.splitext(args['points'])[0]+'_coeff.csv') -- GitLab