v0.2.0

22944397 · Zhang · 8fbc9bb6 · 22944397 · 22944397 · 22944397
Commit 22944397 authored Mar 20, 2018 by Zhang
20 changed files
--- a/docs/source/_static/img/EncNet32k128d.svg
+++ b/docs/source/_static/img/EncNet32k128d.svg
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.0" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 viewBox="0 0 560 420" style="enable-background:new 0 0 560 420;" xml:space="preserve">
+<style type="text/css">
+	.st0{fill:#FFFFFF;}
+	.st1{fill:none;stroke:#262626;stroke-width:0.5;stroke-linecap:square;stroke-linejoin:round;stroke-miterlimit:10;}
+	.st2{fill:#4D4D4F;}
+	.st3{font-family:'Helvetica';}
+	.st4{font-size:10px;}
+	.st5{font-size:11px;}
+	.st6{font-family:'Helvetica-Bold';}
+	.st7{fill:none;stroke:#0D72BA;stroke-width:2;stroke-linejoin:round;stroke-miterlimit:10;}
+	.st8{fill:none;stroke:#D85427;stroke-width:2;stroke-linejoin:round;stroke-miterlimit:10;}
+</style>
+<g>
+	<rect class="st0" width="560" height="420"/>
+	<rect class="st0" width="560" height="420"/>
+	<rect x="73" y="31" class="st0" width="434" height="343"/>
+	<path class="st1" d="M73,374h434 M73,31h434 M73,374v-4.3 M145.3,374v-4.3 M217.7,374v-4.3 M290,374v-4.3 M362.3,374v-4.3
+		 M434.7,374v-4.3 M507,374v-4.3 M73,31v4.3 M145.3,31v4.3 M217.7,31v4.3 M290,31v4.3 M362.3,31v4.3 M434.7,31v4.3 M507,31v4.3"/>
+	<text transform="matrix(1 0 0 1 70 388)" class="st2 st3 st4">0</text>
+	<text transform="matrix(1 0 0 1 136.8359 388)" class="st2 st3 st4">100</text>
+	<text transform="matrix(1 0 0 1 209.1719 388)" class="st2 st3 st4">200</text>
+	<text transform="matrix(1 0 0 1 281.5 388)" class="st2 st3 st4">300</text>
+	<text transform="matrix(1 0 0 1 353.8359 388)" class="st2 st3 st4">400</text>
+	<text transform="matrix(1 0 0 1 426.1641 388)" class="st2 st3 st4">500</text>
+	<text transform="matrix(1 0 0 1 498.5 388)" class="st2 st3 st4">600</text>
+	<text transform="matrix(1 0 0 1 271.5 403)" class="st2 st3 st5">epochs</text>
+	<path class="st1" d="M73,374V31 M507,374V31 M73,374h4.3 M73,335.9h4.3 M73,297.8h4.3 M73,259.7h4.3 M73,221.6h4.3 M73,183.4h4.3
+		 M73,145.3h4.3 M73,107.2h4.3 M73,69.1h4.3 M73,31h4.3 M507,374h-4.3 M507,335.9h-4.3 M507,297.8h-4.3 M507,259.7h-4.3 M507,221.6
+		h-4.3 M507,183.4h-4.3 M507,145.3h-4.3 M507,107.2h-4.3 M507,69.1h-4.3 M507,31h-4.3"/>
+	<text transform="matrix(1 0 0 1 63 377.5)" class="st2 st3 st4">0</text>
+	<text transform="matrix(1 0 0 1 63 339.3906)" class="st2 st3 st4">2</text>
+	<text transform="matrix(1 0 0 1 63 301.2813)" class="st2 st3 st4">4</text>
+	<text transform="matrix(1 0 0 1 63 263.1641)" class="st2 st3 st4">6</text>
+	<text transform="matrix(1 0 0 1 63 225.0625)" class="st2 st3 st4">8</text>
+	<text transform="matrix(1 0 0 1 57 186.9453)" class="st2 st3 st4">10</text>
+	<text transform="matrix(1 0 0 1 57 148.8359)" class="st2 st3 st4">12</text>
+	<text transform="matrix(1 0 0 1 57 110.7188)" class="st2 st3 st4">14</text>
+	<text transform="matrix(1 0 0 1 57 72.6172)" class="st2 st3 st4">16</text>
+	<text transform="matrix(1 0 0 1 57 34.5)" class="st2 st3 st4">18</text>
+	<text transform="matrix(0 -1 1 0 51 217)" class="st2 st3 st5">errors</text>
+	<text transform="matrix(1 0 0 1 246.5 25.25)" class="st6 st5">EncNet-32k128d</text>
+	<polyline class="st7" points="77,31 77.3,42.5 78.1,63.1 78.8,88.1 79.5,90.8 80.2,110.7 81,111.8 81.7,126.5 82.4,133.6 
+		83.1,138.9 83.8,142.3 84.6,149 85.3,149.4 86,157.9 86.7,157 87.5,165.3 88.2,163.4 88.9,169 89.6,172 90.4,169.3 91.1,171.5 
+		91.8,176.1 92.5,172.3 93.3,175.4 94,179.7 94.7,184.3 95.4,180.1 96.1,185.8 96.9,184.1 97.6,184.1 98.3,185.2 99,185.2 
+		99.8,188.3 100.5,188.2 101.2,192.7 101.9,186.9 102.7,188.2 103.4,183.7 104.1,191.8 104.8,189.7 105.5,192.8 106.3,196.1 
+		107,189.2 107.7,195.7 108.4,197.1 109.2,197.8 109.9,194.3 110.6,196.3 111.3,195.5 112.1,198.9 112.8,194.9 113.5,200.6 
+		114.2,196.3 115,197.6 115.7,196.7 116.4,199 117.1,199.6 117.8,203.3 118.6,197.2 119.3,198.5 120,205.1 120.7,204 121.5,200.6 
+		122.2,206.1 122.9,203.6 123.6,207.3 124.4,202.9 125.1,203.7 125.8,201.5 126.5,207.1 127.2,204.7 128,206.9 128.7,207.4 
+		129.4,204.3 130.1,206.2 130.9,200.8 131.6,209.9 132.3,212.8 133,208.9 133.8,207.6 134.5,209.1 135.2,209.6 135.9,205.2 
+		136.7,210.6 137.4,211.6 138.1,209 138.8,207.3 139.5,210.5 140.3,208.3 141,210.4 141.7,208.3 142.4,210.9 143.2,208.7 143.9,210 
+		144.6,213.7 145.3,213 146.1,210.6 146.8,210.5 147.5,213.9 148.2,213.5 148.9,211.1 149.7,212.2 150.4,221 151.1,211 151.8,217.1 
+		152.6,211.1 153.3,218.6 154,216.4 154.7,214.8 155.5,220.2 156.2,213.1 156.9,214.8 157.6,215.3 158.4,220 159.1,217 159.8,217.3 
+		160.5,218.7 161.2,216.6 162,218.6 162.7,220.4 163.4,221.7 164.1,222 164.9,220.8 165.6,220.4 166.3,223 167,219.3 167.8,222.5 
+		168.5,219.3 169.2,221.3 169.9,220.6 170.6,220.9 171.4,224.9 172.1,224.1 172.8,220.6 173.5,222.2 174.3,225.3 175,223.9 
+		175.7,225 176.4,225.4 177.2,221 177.9,224.1 178.6,226.2 179.3,223.2 180.1,226 180.8,222.3 181.5,225.6 182.2,226.9 182.9,227.8 
+		183.7,230.3 184.4,225.9 185.1,226.8 185.8,224.6 186.6,225.1 187.3,226.4 188,230.6 188.7,227.3 189.5,225.1 190.2,231.2 
+		190.9,232.8 191.6,232.7 192.3,229.6 193.1,234.2 193.8,235 194.5,228.1 195.2,231.9 196,230.3 196.7,234.6 197.4,233.7 
+		198.1,227.1 198.9,234.4 199.6,236 200.3,236.3 201,226.5 201.8,237 202.5,236.1 203.2,234.7 203.9,237 204.6,235.4 205.4,237.3 
+		206.1,240.5 206.8,231.2 207.5,240.1 208.3,239.2 209,235.8 209.7,238.1 210.4,240.4 211.2,238.5 211.9,242.3 212.6,242.7 
+		213.3,239.8 214,238.3 214.8,242.3 215.5,241.1 216.2,242.4 216.9,242.7 217.7,241.8 218.4,244.5 219.1,247.7 219.8,243.3 
+		220.6,243.2 221.3,244.8 222,242.7 222.7,245.5 223.5,243.1 224.2,240.8 224.9,245 225.6,247.4 226.3,247.2 227.1,248.6 
+		227.8,243.7 228.5,250.2 229.2,250.6 230,247.5 230.7,251.2 231.4,247.8 232.1,251.2 232.9,251.2 233.6,253.4 234.3,251.2 
+		235,253.5 235.7,247.4 236.5,254.5 237.2,253.5 237.9,252.5 238.6,250.7 239.4,254 240.1,255.4 240.8,252.9 241.5,255.7 
+		242.3,253.2 243,259.8 243.7,257.8 244.4,253.6 245.2,255.1 245.9,256.8 246.6,254.4 247.3,256.7 248,265.3 248.8,255.7 249.5,260 
+		250.2,259.7 250.9,262.7 251.7,262.3 252.4,264.1 253.1,259.8 253.8,265.1 254.6,262.9 255.3,259.8 256,264.5 256.7,266.5 
+		257.5,264.3 258.2,266.8 258.9,263.6 259.6,264.6 260.3,265.3 261.1,263.8 261.8,264.3 262.5,267.5 263.2,268.7 264,274.2 
+		264.7,268.2 265.4,267.4 266.1,266.5 266.9,271.1 267.6,271.1 268.3,271.3 269,275.6 269.7,270.5 270.5,274.9 271.2,276.1 
+		271.9,270 272.6,274.6 273.4,270.7 274.1,270.5 274.8,277.9 275.5,270.5 276.3,280.3 277,278.6 277.7,277.5 278.4,279.1 
+		279.1,281.5 279.9,277.6 280.6,279 281.3,281.1 282,279.2 282.8,280.6 283.5,281.2 284.2,286 284.9,277.5 285.7,281.3 286.4,285.7 
+		287.1,280.8 287.8,283.2 288.6,285.2 289.3,283.8 290,284.8 290.7,285.2 291.4,289.7 292.2,283.9 292.9,287.2 293.6,287.6 
+		294.3,285.7 295.1,287.6 295.8,289.4 296.5,292.1 297.2,285.8 298,289.3 298.7,293.9 299.4,293.4 300.1,286.3 300.8,294.1 
+		301.6,297.9 302.3,297 303,292.6 303.7,299.2 304.5,292.7 305.2,296.9 305.9,298.5 306.6,295 307.4,298.2 308.1,297.9 308.8,297.7 
+		309.5,299.3 310.3,294.6 311,299.6 311.7,302.4 312.4,299.2 313.1,301.4 313.9,302 314.6,303.4 315.3,301.8 316,305.4 316.8,305.7 
+		317.5,306 318.2,304.3 318.9,301.9 319.7,305.1 320.4,305.6 321.1,311.1 321.8,309.4 322.5,304.3 323.3,307 324,308.7 324.7,307.9 
+		325.4,310.9 326.2,307.6 326.9,309.9 327.6,312 328.3,312 329.1,313.5 329.8,314.7 330.5,311.9 331.2,314.9 332,315.2 332.7,313.9 
+		333.4,314.1 334.1,316.3 334.8,317.1 335.6,314.3 336.3,318.8 337,320.5 337.7,319.1 338.5,322.3 339.2,315.1 339.9,319.7 
+		340.6,321.1 341.4,322.9 342.1,318.1 342.8,320.3 343.5,322.7 344.3,325.1 345,317.9 345.7,323.1 346.4,320.4 347.1,326.5 
+		347.9,325.1 348.6,330.2 349.3,323 350,327.2 350.8,329 351.5,325.8 352.2,326.9 352.9,330.6 353.7,326.5 354.4,329.9 355.1,332.8 
+		355.8,331.2 356.5,333.5 357.3,331.9 358,331.8 358.7,330.9 359.4,330.7 360.2,333.3 360.9,338.6 361.6,335.3 362.3,333.3 
+		363.1,334.3 363.8,335.9 364.5,334.3 365.2,333.3 365.9,340.5 366.7,334.4 367.4,339.4 368.1,337 368.8,337.7 369.6,336.4 
+		370.3,346.2 371,337.1 371.7,341.9 372.5,342.4 373.2,338.9 373.9,339.4 374.6,343.9 375.4,345.1 376.1,338.9 376.8,341.9 
+		377.5,345.4 378.2,343.7 379,347 379.7,347.7 380.4,344.5 381.1,347.5 381.9,345.8 382.6,344 383.3,346.9 384,349.8 384.8,352.1 
+		385.5,349.9 386.2,347.3 386.9,346.6 387.6,349.7 388.4,352.3 389.1,350.7 389.8,351 390.5,354.3 391.3,353.3 392,351.4 
+		392.7,348.2 393.4,356.5 394.2,355.8 394.9,355.6 395.6,357.6 396.3,351.9 397.1,358.2 397.8,354.7 398.5,356.1 399.2,354.9 
+		399.9,356.6 400.7,356.2 401.4,357.7 402.1,359.1 402.8,354.5 403.6,354.5 404.3,360.9 405,356.4 405.7,357.2 406.5,360.5 
+		407.2,362.7 407.9,361.1 408.6,364.7 409.4,360.9 410.1,361.5 410.8,362.3 411.5,364.1 412.2,363.5 413,364.2 413.7,365 
+		414.4,363.8 415.1,361.8 415.9,365.7 416.6,365.7 417.3,365.8 418,368.4 418.8,367.1 419.5,367.1 420.2,369.1 420.9,368.9 
+		421.6,367.9 422.4,368.9 423.1,368.7 423.8,369.4 424.5,370.1 425.3,369.9 426,369.4 426.7,369.4 427.4,370.2 428.2,372.7 
+		428.9,370.6 429.6,370.5 430.3,372.9 431,372.7 431.8,373 432.5,373.2 433.2,373.2 433.9,371.8 434.7,372.6 435.4,371.9 
+		436.1,372.1 436.8,371.8 437.6,372.7 438.3,373.7 439,373.7 439.7,373.7 440.5,373.5 441.2,373.8 441.9,373.9 442.6,373.9 
+		443.3,374 444.1,374 444.8,373.9 445.5,374 446.2,374 447,374 447.7,373.9 448.4,374 449.1,374 449.9,374 450.6,373.9 451.3,373.9 
+		452,373.9 452.8,374 453.5,374 454.2,374 454.9,374 455.6,374 456.4,374 457.1,374 457.8,374 458.5,374 459.3,374 460,374 
+		460.7,374 461.4,374 462.2,374 462.9,374 463.6,374 464.3,374 465,374 465.8,374 466.5,374 467.2,374 467.9,374 468.7,374 
+		469.4,374 470.1,374 470.8,374 471.6,374 472.3,374 473,374 473.7,374 474.5,374 475.2,374 475.9,374 476.6,374 477.3,374 
+		478.1,374 478.8,374 479.5,374 480.2,374 481,374 481.7,374 482.4,374 483.1,374 483.9,374 484.6,374 485.3,374 486,374 486.7,374 
+		487.5,374 488.2,374 488.9,374 489.6,374 490.4,374 491.1,374 491.8,374 492.5,374 493.3,374 494,374 494.7,374 495.4,374 
+		496.1,374 	"/>
+	<path class="st8" d="M82.1,31l0.3,73.6L83,31 M83.3,31l0.6,54.7l0.7,8.6L85,31 M86.6,31l0.2,22.3L87,31 M88.6,31l0.3,67.5l0.7,3.4
+		l0.7-30.1l0.7,29.5L91.8,87l0.7,0.4l0.7,25.5l0.7,3.2l0.7-40.8l0.7-3.8l0.7,38.9l0.7-24.2l0.7,0.6L98.3,76l0.4-45 M99.2,31
+		l0.5,79.7l0.6-79.7 M100.7,31l0.5,46.9l0.7-11.8l0.7,72.6l0.7-89.2l0.7,39.1l0.6-57.6 M104.9,31l0.6,63.3l0.7-56.2l0.7,61.7
+		l0.7-1.3l0.7-41.5l0.7,33.3l0.7,41l0.7-65.7l0.7-9l0.7-7l0.7,20.2l0.3-38.7 M113.9,31l0.4,50.3l0.7,49.9l0.7-80l0.6-20.2 M117.4,31
+		l0.4,70.4l0.3-70.4 M119.6,31l0.4,45l0.7-10.1l0.5-34.9 M121.6,31l0.6,53.6l0.7-16.2l0.7,33.9l0.7-36.6l0.7,23.6l0.7,9.5l0.7,23.4
+		l0.7-91.3 M127.4,31l0.6,13l0.2-13 M128.9,31l0.6,103.5l0.7-76.6l0.7,92.2L131.6,35l0.7,33.2l0.7,40l0.7,12l0.7-13.1l0.7-60.2
+		l0.5-15.9 M136,31l0.7,78l0.7-70.9l0.7,111.3L138.7,31 M139.1,31l0.5,30.9l0.1-30.9 M141.4,31l0.3,34.3l0.7,52.4l0.7-30.5l0.7-48
+		l0.7,41.4l0.7,35.1l0.7,41l0.7-35.4l0.7,13.7l0.7-72.6l0.7,9.7l0.7-20.6l0.7,74.1l0.5-94.6 M151.3,31l0.5,106.2l0.7-17.9l0.7,16.2
+		l0.7-85.6l0.7,76.6l0.7-41.2l0.7,24.6l0.7-49.2l0.7,12.4l0.3-42.2 M158.7,31l0.4,56.8l0.7,37.5l0.7-7.2l0.7-87.1 M161.3,31
+		l0.7,80.8l0.7,18.3l0.7,36.4l0.7-86.3l0.7-29.9l0.7,119.1l0.7-101.8l0.7,81l0.7-19.1l0.7-77.9l0.4-20.6 M169.3,31l0.6,104.3l0.7-76
+		l0.7,41.9l0.7-11.8l0.7,50.9l0.7-69.2l0.7-40.1 M174.3,31l0.7,121l0.7-120.2l0.7,73l0.7-70.3l0.7,48.8l0.7,26.1l0.7-13.9l0.7,4.4
+		l0.7-15.8l0.7,50.7l0.7-26.5l0.7,26.5l0.7,6.3l0.7-7.4l0.7-24l0.7,14.5l0.7-57.2l0.7,36.2L188,31 M188,31l0.7,87.3l0.7-11.4
+		l0.7-37.2l0.7,23.1l0.7,31.6l0.7-84.2l0.7,74.1l0.7-33.7l0.7,50.5l0.7,39.3l0.7-78.9l0.7,35.4l0.7-15.2l0.7,34.9l0.7-24.8l0.7-9.3
+		L200,31 M200.7,31l0.3,53.8l0.7,52.4l0.7,3.2l0.7-54.9l0.7,40.6l0.7-32.2l0.7,42.9l0.7-53.9l0.7,79.3l0.7-1l0.7-25.5L209,84l0.3-53
+		 M210,31l0.4,102.2l0.7-12.2l0.7,46.1l0.7-45.4l0.7-69.4l0.7,93.4L214.7,31 M214.9,31l0.6,37.6l0.7,45.9l0.7-21l0.7,23.6l0.7,11.1
+		l0.7,21l0.7-62.3l0.7,56l0.7-29.7l0.7,49.5l0.7,9.9l0.7-62.5l0.7,35.4l0.7-28.2l0.7,18.1l0.7,36L227.1,36l0.7,114.9l0.7-38.7
+		l0.7-1.5l0.7,48.6l0.7-68.4l0.7-59.9 M231.4,31l0.7,124.3l0.7,0.2l0.7-20.8l0.7-27.2l0.5-76.5 M235.2,31l0.6,127l0.7,5.5l0.7-8.4
+		l0.7-22.5l0.7,6.5l0.7,9.7l0.7,9.1l0.7,4l0.7-12.4l0.7-27.3l0.7,43.4l0.7,8.6l0.7-37.3l0.7-63.5l0.7,52l0.7-4.8l0.7-46.3l0.7,16
+		l0.7,36.4l0.7,45.2l0.7-3.4l0.7-22.9l0.7-13.5l0.7,14.5l0.7-12l0.7-37l0.7,18.5l0.7-12.8L256,168l0.7,10.9l0.7-29.5l0.7-50.9
+		l0.7,45l0.7,17.3l0.7-4.4l0.7-54.7l0.7,52.2l0.7,24.6l0.7-39.1l0.7-94.1l0.7,15.1l0.7,67.3l0.7,44l0.7-53.7l0.7,52.2l0.7-46.7
+		l0.7-2.7l0.7,7.8l0.7,51.5l0.7-2.1l0.7-11.2l0.7-21.3l0.7,22.9l0.7-39.8l0.7,28.2l0.7-53.4l0.7,64.8l0.7-7.6l0.7,16.4l0.7-18.3
+		l0.7,2.9l0.7-19.4l0.7,48l0.7-21l0.7-0.6l0.7,18.5l0.7-18.7l0.7-10.9l0.7,39.8l0.7-2.5l0.7-12.8l0.7-31.4l0.7,13l0.7,37.2l0.7-12.2
+		l0.7-61.6l0.7,59.3l0.7-141.6l0.7,145.6l0.7-19.4l0.7,28.6l0.7-17.2l0.7,10.9l0.7-31.3l0.7,19.4l0.7-65.6l0.7,65.6l0.7,11.2
+		l0.7-74.9l0.7,52l0.7,25.5l0.7-14.9l0.7,8.4l0.7-69.9l0.7,49.4l0.7,24l0.7-29.7l0.7,25.9l0.7-81.7l0.7,88.8l0.7-57l0.7,35.8l0.7-3
+		l0.7-59.8l0.7,92.6l0.7-32l0.7-13.3l0.7,8.6l0.7-6.1l0.7,2.9l0.7-3.2l0.7,44.6l0.7-14.5l0.7-2.9l0.7-3.8l0.7-19.2l0.7-71.3
+		l0.7,57.7l0.7-0.6l0.7,15.4l0.7,27.4l0.7-7.2l0.7-11.6l0.7,43.8l0.7-18.9l0.7-36.2l0.7,21l0.7,0.4l0.7-5.7l0.7-10.3l0.7,60.8
+		l0.7-30.1l0.7-41l0.7,59.1l0.7-61.9l0.7,44.6l0.7-9.7l0.7-11.1l0.7-0.6l0.7,1.1l0.7,16.6l0.7,20.2l0.7-24.4l0.7-1.3l0.7-12
+		l0.7,34.7l0.7-83.3l0.7,77l0.7-28.8l0.7,50.5l0.7-56l0.7,49.7l0.7-26.9l0.7,4.8l0.7,16l0.7,5.7l0.7,6.1l0.7-4.4l0.7-9.1l0.7,1
+		l0.7-32.8l0.7,45.9l0.7-131.7l0.7,105.6l0.7,16.8l0.7-27.8l0.7,34.3l0.7-25.9l0.7-5.9l0.7,16l0.7,1.1l0.7-31.4l0.7,48l0.7-8.6
+		l0.7-2.1l0.7,11.8l0.7-8.2l0.7-2.7l0.7,5.9l0.7-19.8l0.7,7.2l0.7,21.2l0.7-39.3l0.7,16.6l0.7,6.9l0.7,13.5l0.7,3l0.7-22.1l0.7,18.9
+		l0.7,9l0.7-9.5l0.7-16.8l0.7,11.6l0.7-3l0.7,1.3l0.7,24.4l0.7-9.9l0.7-36.8l0.7,23.4l0.7-5l0.7,8.4l0.7-1.1l0.7-1l0.7,13l0.7,0.6
+		l0.7-0.4l0.7,6.5l0.7-46.7l0.7,42.9l0.7,2.1l0.7-21.9l0.7,21.3l0.7-25.7l0.7,5.7l0.7-20.6l0.7,22.5l0.7,12.6l0.7,7.1l0.7,5.7l0.7-1
+		l0.7-0.2l0.7-25.5l0.7,32.4l0.7-19.6l0.7,10.5l0.7-10.3l0.7-20.6l0.7,38.3l0.7-17.1l0.7-12l0.7-6.3l0.7,14.9l0.7,16l0.7-9.9
+		l0.7-0.2l0.7,17.2l0.7,5.7l0.7-0.4l0.7-27.1l0.7,22.3l0.7-11.2l0.7,24l0.7-10.3l0.7-5l0.7-7.6l0.7,18.3l0.7-8l0.7-1.9l0.7-0.8
+		l0.7,5.9l0.7,5.5l0.7-2.3l0.7,3.4l0.7-18.3l0.7,19.4l0.7,3l0.7,0.2l0.7-6.9l0.7,8.6l0.7-9.9l0.7,12.8l0.7,5l0.7-0.2l0.7-10.5
+		l0.7,4.2l0.7,7.8l0.7-0.4l0.7-0.6l0.7-5l0.7,6.3l0.7,1l0.7,1l0.7-2.1l0.7-1.1l0.7,3.4l0.7-4.8l0.7,2.1l0.7,2.1l0.7-0.2l0.7,9.1
+		l0.7,1.9l0.7-0.8l0.7,5.5l0.7-3l0.7,1.3l0.7,2.1l0.7-0.8l0.7-1.9l0.7,0.2l0.7,5.9l0.7-0.6l0.7,0.8l0.7-1.9l0.7-3.8l0.7,6.5h0.7
+		l0.7-1.9l0.7,1.5l0.7,1l0.7,0.2l0.7-1.5l0.7,1.3l0.7,0.2l0.7-1.1l0.7-1.9l0.7,3.4l0.7-0.6l0.7,0.2l0.7,1.3l0.7-0.8l0.7-0.2l0.7-1.7
+		l0.7,2.5l0.7,1l0.7-1.3l0.7,1.1l0.7,0.2l0.7-1.3l0.7,1.7l0.7-4l0.7,2.7l0.7,2.3l0.7-3.4l0.7,1.1l0.7-2.1l0.7,0.6l0.7-0.4l0.7,1.5
+		l0.7-2.1l0.7,1.3l0.7,1.1l0.7,0.6l0.7-0.2l0.7,0.2l0.7-1.7l0.7,1l0.7,0.2l0.7-0.2l0.7,1l0.7-0.4l0.7,1.5l0.7-1.5l0.7,0.4l0.7-1.1
+		l0.7-0.8l0.7,0.2l0.7,1l0.7-1.1h0.7l0.7,2.9l0.7-0.8l0.7-0.6l0.7,1.7l0.7-1.3l0.7-0.8l0.7-0.2l0.7-0.2l0.7,0.8l0.7-0.6l0.7,0.8
+		l0.7,2.9"/>
+</g>
+</svg>
--- a/docs/source/dilated.rst
+++ b/docs/source/dilated.rst
@@ -9,8 +9,17 @@ For dilation of DenseNet, we provide :class:`encoding.nn.DilatedAvgPool2d`.
 All provided models have been verified. 

 .. note::
+    This code is provided together with the paper

-    This code is provided together with the paper (coming soon), please cite our work.
+    * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation"  *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*::
+
+        @InProceedings{Zhang_2018_CVPR,
+        author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
+        title = {Context Encoding for Semantic Segmentation},
+        booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+        month = {June},
+        year = {2018}
+        }

 .. automodule:: encoding.dilated
 .. currentmodule:: encoding.dilated

--- a/docs/source/encoding.rst
+++ b/docs/source/encoding.rst
@@ -44,8 +44,7 @@ Functions

 .. autofunction:: aggregate

-:hidden:`scaledL2`
-~~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: scaledL2
+:hidden:`dilatedavgpool2d`
+~~~~~~~~~~~~~~~~~~~~~~~~~~

+.. autofunction:: dilatedavgpool2d
--- a/docs/source/experiments/cifar.rst
+++ b/docs/source/experiments/cifar.rst
+EncNet on CIFAR-10
+==================
+
+
+Test Pre-trained Model
+----------------------
+
+- Clone the GitHub repo::
+
+    git clone git@github.com:zhanghang1989/PyTorch-Encoding.git
+
+- Install PyTorch Encoding (if not yet). Please follow the installation guide `Installing PyTorch Encoding <../notes/compile.html>`_.
+
+- Download pre-trained EncNet-32k128d model::
+
+    cd PyTorch-Encoding/experiments/recognition
+    bash model/download_cifar_models.sh
+
+.. _curve:
+
+.. image:: ../_static/img/EncNet32k128d.svg
+    :width: 70%
+
+- Test EncNet-32k128d pre-trained model (training `curve`_ of this model is shown above, with a final error rate of :math:`3.35\%`)::
+
+    >>> python main.py --dataset cifar10 --model encnetdrop --widen 8 --ncodes 32 --resume model/encnet_cifar.pth.tar --eval
+    # Teriminal Output:
+    #Loss: 0.129 | Err: 3.350% (335/10000): 100%|█████████████████████████████████████████████| 79/79 [00:49<00:00,  1.58it/s]
+    # Error rate is 3.350 
+
+Train Your Own Model
+--------------------
+
+- Example training command for training above model::
+
+    CUDA_VISIBLE_DEVICES=0,1 python main.py --dataset cifar10 --model encnetdrop --widen 8 --ncodes 32 --lr-scheduler cos --epochs 600 --checkname mycheckpoint
+
+- Detail training options::
+
+  -h, --help            show this help message and exit
+  --dataset DATASET     training dataset (default: cifar10)
+  --model MODEL         network model type (default: densenet)
+  --widen N             widen factor of the network (default: 4)
+  --ncodes N            number of codewords in Encoding Layer (default: 32)
+  --batch-size N        batch size for training (default: 128)
+  --test-batch-size N   batch size for testing (default: 1000)
+  --epochs N            number of epochs to train (default: 300)
+  --start_epoch N       the epoch number to start (default: 0)
+  --lr LR               learning rate (default: 0.1)
+  --momentum M          SGD momentum (default: 0.9)
+  --weight-decay M      SGD weight decay (default: 1e-4)
+  --no-cuda             disables CUDA training
+  --plot                matplotlib
+  --seed S              random seed (default: 1)
+  --resume RESUME       put the path to resuming file if needed
+  --checkname           set the checkpoint name
+  --eval                evaluating
+
+
+Extending the Software
+----------------------
+
+This code is well written, easy to use and extendable for your own models or datasets:
+
+- Write your own Dataloader ``mydataset.py`` to ``dataset/`` folder
+
+- Write your own Model ``mymodel.py`` to ``model/`` folder
+
+- Run the program::
+
+    python main.py --dataset mydataset --model mymodel
+
+Citation
+--------
+
+.. note::
+    * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation"  *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*::
+
+        @InProceedings{Zhang_2018_CVPR,
+        author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
+        title = {Context Encoding for Semantic Segmentation},
+        booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+        month = {June},
+        year = {2018}
+        }
--- a/docs/source/functions.rst
+++ b/docs/source/functions.rst
@@ -9,10 +9,11 @@ Other Functions
 .. currentmodule:: encoding.functions


-:hidden:`dilatedavgpool2d`
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+:hidden:`scaledL2`
+~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: scaledL2

-.. autofunction:: dilatedavgpool2d

 :hidden:`upsample`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -7,10 +7,10 @@ Encoding Documentation

 Created by `Hang Zhang <http://hangzh.com/>`_

- An optimized PyTorch package with CUDA backend, including Encoding Layer :class:`encoding.nn.Encoding`, Multi-GPU Synchronized Batch Normalization :class:`encoding.nn.BatchNorm2d` and other customized modules and functions. 
-
- **Example Systems** for Semantic Segmentation (coming), CIFAR-10 Classification, `Texture Recognition <experiments/texture.html>`_ and `Style Transfer <experiments/style.html>`_ are provided in experiments section. 
+An optimized PyTorch package with CUDA backend. 

+.. todo::
+    A PyTorch DataParallel compatible Synchronized Cross-GPU Batch Normalization will be provided soon.

 .. toctree::
   :glob:
@@ -19,6 +19,13 @@ Created by `Hang Zhang <http://hangzh.com/>`_

   notes/*

+.. toctree::
+   :glob:
+   :maxdepth: 1
+   :caption: Experiment Systems
+
+   experiments/*
+
 .. toctree::
   :maxdepth: 1
   :caption: Package Reference
@@ -31,13 +38,6 @@ Created by `Hang Zhang <http://hangzh.com/>`_
   functions
   utils

-.. toctree::
-   :glob:
-   :maxdepth: 1
-   :caption: Experiment Systems
-
-   experiments/*
-
 Indices and tables
 ==================


--- a/docs/source/notes/compile.rst
+++ b/docs/source/notes/compile.rst
-Installing PyTorch-Encoding
-===========================
+Install and Citations
+=====================


 Install from Source
@@ -21,7 +21,7 @@ Install from Source

             MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install

-Reference
+Citations
 ---------

    .. note::

--- a/encoding/functions/aggregate.py
+++ b/encoding/functions/aggregate.py
-##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-## Created by: Hang Zhang
-## ECE Department, Rutgers University
-## Email: zhang.hang@rutgers.edu
-## Copyright (c) 2017
-##
-## This source code is licensed under the MIT-style license found in the
-## LICENSE file in the root directory of this source tree 
-##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-import threading
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.autograd import Function, Variable
-from .._ext import encoding_lib
-
-
-class aggregate(Function):
-    r"""
-    Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect to the codewords (:math:`C`) with assignment weights (:math:`A`).
-    
-
-    .. math::
-        e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k)
-
-    Shape:
-        - Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}`  (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
-        - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
-
-    Examples:
-        >>> B,N,K,D = 2,3,4,5
-        >>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True)
-        >>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True)
-        >>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True)
-        >>> func = encoding.aggregate()
-        >>> E = func(A, X, C)
-
-    """
-    def forward(self, A, X, C):
-        # A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
-        self.save_for_backward(A, X, C)
-        B, N, K = A.size()
-        D = X.size(2)
-        with torch.cuda.device_of(A):
-            E = A.new(B,K,D)
-        if isinstance(A, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(A):
-                encoding_lib.Encoding_Float_aggregateE_forward(E, A, X, C)
-        elif isinstance(A, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(A):
-                encoding_lib.Encoding_Double_aggregateE_forward(E, A, X, C)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return E
-
-    def backward(self, gradE):
-        A, X, C = self.saved_tensors
-        with torch.cuda.device_of(A):
-            gradA = A.new().resize_as_(A)
-            gradX = A.new().resize_as_(X)
-            gradC = A.new().resize_as_(C)
-        if isinstance(A, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(A):
-                encoding_lib.Encoding_Float_aggregateE_backward(gradA, 
-                    gradE, A, X, C)
-        elif isinstance(A, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(A):
-                encoding_lib.Encoding_Double_aggregateE_backward(gradA, 
-                    gradE, A, X, C)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        gradX.copy_(torch.bmm(A, gradE))
-        gradC.copy_((-gradE*A.sum(1).unsqueeze(2)).sum(0))
-        return gradA, gradX, gradC
-
-
-class scaledL2(Function):
-    r"""
-    scaledL2 distance
-
-    .. math::
-        sl_{ik} = s_k \|x_i-c_k\|^2
-
-    Shape:
-        - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
-        - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
-
-    """
-    def forward(self, X, C, S):
-        B,N,D = X.size()
-        K = C.size(0)
-        with torch.cuda.device_of(X):
-            SL = X.new(B,N,K)
-        if isinstance(X, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(X):
-                encoding_lib.Encoding_Float_scaledl2_forward(SL, X, C, S)
-        elif isinstance(X, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(X):
-                encoding_lib.Encoding_Double_scaledl2_forward(SL, X, C, S)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        self.save_for_backward(X, C, S, SL)
-        return SL
-    def backward(self, gradSL):
-        X, C, S, SL = self.saved_tensors
-        K = C.size(0)
-        with torch.cuda.device_of(X):
-            gradX = X.new().resize_as_(X)
-            gradC = X.new().resize_as_(C)
-            gradS = X.new().resize_as_(S)
-        if isinstance(X, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(X):
-                encoding_lib.Encoding_Float_scaledl2_backward(gradSL, 
-                    gradX, gradC, X, C, S)
-        elif isinstance(X, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(X):
-                encoding_lib.Encoding_Double_scaledl2_backward(gradSL, 
-                    gradX, gradC, X, C, S)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        gradS.copy_((gradSL*(SL/S.view(1,1,K))).sum(0).sum(0))
-        return gradX, gradC, gradS
-
-
-class aggregateP(Function):
-    def forward(self, A, R):
-        # A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
-        self.save_for_backward(A, R)
-        B, N, K, D = R.size()
-        with torch.cuda.device_of(A):
-            E = A.new(B,K,D)
-        if isinstance(A, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(A):
-                encoding_lib.Encoding_Float_aggregate_forward(E, A, R)
-        elif isinstance(A, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(A):
-                encoding_lib.Encoding_Double_aggregate_forward(E, A, R)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return E
-
-    def backward(self, gradE):
-        A, R = self.saved_tensors
-        with torch.cuda.device_of(A):
-            gradA = A.new().resize_as_(A)
-            gradR = R.new().resize_as_(R)
-        if isinstance(A, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(A):
-                encoding_lib.Encoding_Float_aggregate_backward(gradA, 
-                    gradR, gradE, A, R)
-        elif isinstance(A, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(A):
-                encoding_lib.Encoding_Double_aggregate_backward(gradA, 
-                    gradR, gradE, A, R)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return gradA, gradR
-
-
-class residual(Function):
-    r"""
-    Calculate residuals over a mini-batch
-    
-    .. math::
-        r_{ik} = x_i - c_k
-
-    Shape:
-        - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
-        - Output: :math:`R\in\mathcal{R}^{B\times N\times K\times D}`
-
-    """
-    def forward(self, X, C):
-        # X \in(BxNxD) D \in(KxD) R \in(BxNxKxD) 
-        B, N, D = X.size()
-        K = C.size(0)
-        with torch.cuda.device_of(X):
-            R = X.new(B,N,K,D)
-        if isinstance(X, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(X):
-                encoding_lib.Encoding_Float_residual_forward(R, X, C)
-        elif isinstance(X, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(X):
-                encoding_lib.Encoding_Double_residual_forward(R, X, C)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return R
-
-    def backward(self, gradR):
-        B, N, K, D = gradR.size()
-        with torch.cuda.device_of(gradR):
-            gradX = gradR.new(B,N,D)
-            gradD = gradR.new(K,D)
-        if isinstance(gradR, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(gradR):
-                encoding_lib.Encoding_Float_residual_backward(gradR, 
-                    gradX, gradD)
-        elif isinstance(gradR, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(gradR):
-                encoding_lib.Encoding_Double_residual_backward(gradR, 
-                    gradX, gradD)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return gradX, gradD
-
-
-class square_squeeze(Function):
-    def forward(self, R):
-        B, N, K, D = R.size()
-        with torch.cuda.device_of(R):
-            L = R.new(B,N,K)
-        if isinstance(R, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(R):
-                encoding_lib.Encoding_Float_squaresqueeze_forward(L, R)
-        elif isinstance(R, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(R):
-                encoding_lib.Encoding_Double_squaresqueeze_forward(L, R)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        self.save_for_backward(L, R)
-        return L
-
-    def backward(self, gradL):
-        L, R = self.saved_tensors
-        B, N, K, D = R.size()
-        with torch.cuda.device_of(R):
-            gradR = R.new(B,N,K,D)
-        if isinstance(R, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(gradL):
-                encoding_lib.Encoding_Float_squaresqueeze_backward(gradL, 
-                    gradR, R)
-        elif isinstance(R, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(gradL):
-                encoding_lib.Encoding_Double_squaresqueeze_backward(gradL, 
-                    gradR, R)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return gradR
-    
-
-def assign(R, S):
-    r"""
-    Calculate assignment weights for given residuals (:math:`R`) and scale (:math:`S`)
-
-    .. math::
-        a_{ik} = \frac{exp(-s_k\|r_{ik}\|^2)}{\sum_{j=1}^K exp(-s_j\|r_{ik}\|^2)}
-
-    Shape:
-        - Input: :math:`R\in\mathcal{R}^{B\times N\times K\times D}` :math:`S\in \mathcal{R}^K` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
-        - Output :math:`A\in\mathcal{R}^{B\times N\times K}`
-
-    """
-    L = square_squeeze()(R)
-    K = S.size(0)
-    SL = L * S.view(1,1,K)
-    return F.softmax(SL)
--- a/encoding/functions/customize.py
+++ b/encoding/functions/customize.py
@@ -78,6 +78,10 @@ class _dilatedavgpool2d(Function):
 def dilatedavgpool2d(input, kernel_size, stride=None, padding=0, 
        dilation=1):
    """Dilated Average Pool 2d, for dilation of DenseNet. 
+ 
+    Reference:
+
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. “Context Encoding for Semantic Segmentation. CVPR 2018

    Applies 2D average-pooling operation in kh x kw regions by step size
    dh x dw steps. The number of output features is equal to the number of

--- a/encoding/modules/__init__.py
+++ b/encoding/modules/__init__.py
-from .encoding import *
--- a/encoding/modules/encoding.py
+++ b/encoding/modules/encoding.py
-##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-## Created by: Hang Zhang
-## ECE Department, Rutgers University
-## Email: zhang.hang@rutgers.edu
-## Copyright (c) 2017
-##
-## This source code is licensed under the MIT-style license found in the
-## LICENSE file in the root directory of this source tree 
-##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-import threading
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.autograd import Function, Variable
-from .._ext import encoding_lib
-from ..functions import *
-
-
-class Encoding(nn.Module):
-    r"""
-    Encoding Layer: learnable residual encoders over 3d or 4d input that 
-    is seen as a mini-batch.
-
-    .. math::
-
-        e_{ik} = \frac{exp(-s_k\|x_{i}-c_k\|^2)}{\sum_{j=1}^K exp(-s_j\|x_{i}-c_j\|^2)} (x_i - c_k)
-
-    Args:
-        D: dimention of the features or feature channels
-        K: number of codeswords
-
-    Shape:
-        - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` or :math:`\mathcal{R}^{B\times D\times H\times W}` (where :math:`B` is batch, :math:`N` is total number of features or :math:`H\times W`.)
-        - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
-        
-    Attributes:
-        codewords (Tensor): the learnable codewords of shape (:math:`K\times D`)
-        scale (Tensor): the learnable scale factor of visual centers
-
-    Examples:
-        >>> import encoding
-        >>> import torch
-        >>> import torch.nn.functional as F
-        >>> from torch.autograd import Variable, gradcheck
-        >>> B,C,H,W,K = 2,3,4,5,6
-        >>> X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), requires_grad=True)
-        >>> layer = encoding.Encoding(C,K).double().cuda()
-        >>> E = layer(X)
-
-    Reference:
-        Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*
-    """
-    def __init__(self, D, K):
-        super(Encoding, self).__init__()
-        # init codewords and smoothing factor
-        self.D, self.K = D, K
-        self.codewords = nn.Parameter(torch.Tensor(K, D), 
-            requires_grad=True)
-        self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True) 
-        self.reset_params()
-        
-    def reset_params(self):
-        std1 = 1./((self.K*self.D)**(1/2))
-        std2 = 1./((self.K)**(1/2))
-        self.codewords.data.uniform_(-std1, std1)
-        self.scale.data.uniform_(-std2, std2)
-
-    def forward(self, X):
-        # input X is a 4D tensor
-        assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
-        if X.dim() == 3:
-            # BxDxN
-            B, N, K, D = X.size(0), X.size(2), self.K, self.D
-            X = X.transpose(1,2).contiguous()
-        elif X.dim() == 4:
-            # BxDxHxW
-            B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
-            X = X.view(B,D,-1).transpose(1,2).contiguous()
-        else:
-            raise RuntimeError('Encoding Layer unknown input dims!')
-        # assignment weights
-        A = F.softmax(scaledL2()(X, self.codewords, self.scale))
-        # aggregate
-        E = aggregate()(A, X, self.codewords)
-        return E
-
-    def __repr__(self):
-        return self.__class__.__name__ + '(' \
-            + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
-            + str(self.D) + ')'
-
-
-class Aggregate(nn.Module):
-    r"""
-    Aggregate operation, aggregate the residuals (:math:`R`) with 
-    assignment weights (:math:`A`).
-
-    .. math::
-        e_{k} = \sum_{i=1}^{N} a_{ik} r_{ik}
-
-    Shape:
-        - Input: :math:`A\in\mathcal{R}^{B\times N\times K}` :math:`R\in\mathcal{R}^{B\times N\times K\times D}` (where :math:`B` is batch, :math:`N` is total number of features, :math:`K` is number is codewords, :math:`D` is feature dimensions.)
-        - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
-
-    """ 
-    def forward(self, A, R):
-        return aggregateP()(A, R)
-
-
-class EncodingP(nn.Module):
-    def __init__(self, D, K):
-        super(EncodingP, self).__init__()
-        # init codewords and smoothing factor
-        self.D, self.K = D, K
-        self.codewords = nn.Parameter(torch.Tensor(K, D), 
-            requires_grad=True)
-        self.scale = nn.Parameter(torch.Tensor(K), requires_grad=True) 
-        self.reset_params()
-        print('EncodingP is deprecated, please use Encoding.')
-        
-    def reset_params(self):
-        std1 = 1./((self.K*self.D)**(1/2))
-        std2 = 1./((self.K)**(1/2))
-        self.codewords.data.uniform_(-std1, std1)
-        self.scale.data.uniform_(-std2, std2)
-
-    def forward(self, X):
-        # input X is a 4D tensor
-        assert(X.size(1)==self.D,"Encoding Layer wrong channels!")
-        if X.dim() == 3:
-            # BxDxN
-            B, N, K, D = X.size(0), X.size(2), self.K, self.D
-            X = X.transpose(1,2)
-        elif X.dim() == 4:
-            # BxDxHxW
-            B, N, K, D = X.size(0), X.size(2)*X.size(3), self.K, self.D
-            X = X.view(B,D,-1).transpose(1,2)
-        else:
-            raise RuntimeError('Encoding Layer unknown input dims!')
-        # calculate residuals
-        R = residual()(X.contiguous(), self.codewords)
-        # assignment weights
-        A = assign(R, self.scale)
-        # aggregate
-        E = aggregateP()(A, R)
-
-        return E
-
-    def __repr__(self):
-        return self.__class__.__name__ + '(' \
-            + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
-            + str(self.D) + ')'
-
-
--- a/encoding/nn/encoding.py
+++ b/encoding/nn/encoding.py
@@ -202,7 +202,8 @@ class DilatedAvgPool2d(Module):
    in :class:`encoding.dilated.DenseNet`.

    Reference:
-        We provide this code for a comming paper.
+
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. “Context Encoding for Semantic Segmentation. CVPR 2018

    Applies a 2D average pooling over an input signal composed of several input planes.


--- a/encoding/nn/syncbn.py
+++ b/encoding/nn/syncbn.py
@@ -30,8 +30,9 @@ class BatchNorm1d(Module):

    `Implementation ideas <./notes/syncbn.html>`_. Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`

-    Reference::
-        We provide this code for a comming paper.
+    Reference:
+
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. “Context Encoding for Semantic Segmentation. CVPR 2018

    Applies Batch Normalization over a 2d or 3d input that is seen as a
    mini-batch.
@@ -225,8 +226,9 @@ class BatchNorm2d(Module):

    `Implementation ideas <./notes/syncbn.html>`_. Please use compatible :class:`encoding.parallel.SelfDataParallel` and :class:`encoding.nn`. 

-    Reference::
-        We provide this code for a comming paper.
+    Reference:
+
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. “Context Encoding for Semantic Segmentation. CVPR 2018

    Applies Batch Normalization over a 4d input that is seen as a mini-batch
    of 3d inputs

--- a/encoding/parallel.py
+++ b/encoding/parallel.py
@@ -101,8 +101,9 @@ class Broadcast(Function):
 class ModelDataParallel(Module):
    """Implements data parallelism at the module level.

-    Reference::
-        We provide this code for a comming paper.
+    Reference:
+
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. “Context Encoding for Semantic Segmentation. CVPR 2018

    This container parallelizes the application of the given module by
    splitting the input across the specified devices by chunking in the 
@@ -171,8 +172,9 @@ class CriterionDataParallel(Module):
    Calculate loss in multiple-GPUs, which balance the memory usage for 
    Semantic Segmentation.

-    Reference::
-        We provide this code for a comming paper.
+    Reference:
+ 
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. “Context Encoding for Semantic Segmentation. CVPR 2018

    The targets are splitted across the specified devices by chunking in
    the batch dimension. Please use together with :class:`encoding.parallel.ModelDataParallel`.
@@ -216,8 +218,9 @@ class CriterionDataParallel(Module):
 class SelfDataParallel(Module):
    """SelfDataParallel, please make sure you understand it before using.

-    Reference::
-        We provide this code for a comming paper.
+    Reference:
+
+        Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. “Context Encoding for Semantic Segmentation. CVPR 2018

    Each module in the network should be in self-parallel mode, 
    which allows list of inputs from multiple GPUs.

--- a/encoding/syncbn.py
+++ b/encoding/syncbn.py
-##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-## Created by: Hang Zhang
-## ECE Department, Rutgers University
-## Email: zhang.hang@rutgers.edu
-## Copyright (c) 2017
-##
-## This source code is licensed under the MIT-style license found in the
-## LICENSE file in the root directory of this source tree 
-##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-import threading
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.autograd import Function, Variable
-from ._ext import encoding_lib
-
-class sum_square(Function):
-    r"""
-    Calculate sum of elements and sum of squares for Batch Normalization.
-    """
-    def forward(ctx, input):
-        ctx.save_for_backward(input)
-        B,C,H,W = input.size()
-        with torch.cuda.device_of(input):
-            xsum    = input.new().resize_(C).zero_()
-            xsquare = input.new().resize_(C).zero_()
-        if isinstance(input, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Float_sum_square_Forward(
-                    input.view(B,C,-1), xsum, xsquare)
-        elif isinstance(input, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Double_sum_square_Forward( 
-                    input.view(B,C,-1), xsum, xsquare)
-        else:
-            raise RuntimeError('Unimplemented data type!') 
-        return xsum, xsquare
-
-    def backward(ctx, gradSum, gradSquare):
-        input, = ctx.saved_tensors
-        B,C,H,W = input.size()
-        with torch.cuda.device_of(input):
-            gradInput = input.new().resize_(B,C,H*W).zero_()
-        if isinstance(input, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Float_sum_square_Backward(
-                    gradInput, input.view(B,C,-1), gradSum, gradSquare)
-        elif isinstance(input, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Double_sum_square_Backward( 
-                    gradInput, input.view(B,C,-1), gradSum, gradSquare)
-        else:
-            raise RuntimeError('Unimplemented data type!') 
-        return gradInput.view(B,C,H,W)
-
-
-class batchnormtrain(Function):
-    r"""Applies Batch Normalization over a 3d input that is seen as a
-    mini-batch.
-
-    .. _bencoding.atchnormtrain:
-
-    .. math::
-
-        y = \frac{x - \mu[x]}{ \sqrt{var[x] + \epsilon}} * \gamma + \beta
-
-    Shape:
-        - Input: :math:`(N, C)` or :math:`(N, C, L)`
-        - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
-
-    """
-    def forward(ctx, input, gamma, beta, mean, std):
-        ctx.save_for_backward(input, gamma, beta, mean, std)
-        assert(input.dim()==3)
-        with torch.cuda.device_of(input):
-            invstd = 1.0 / std
-            output = input.new().resize_as_(input)
-        if isinstance(input, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Float_batchnorm_Forward(output, 
-                    input, mean, invstd, gamma, beta)
-        elif isinstance(input, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Double_batchnorm_Forward(output, 
-                    input, mean, invstd, gamma, beta)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return output 
-
-    def backward(ctx, gradOutput):
-        input, gamma, beta, mean, std = ctx.saved_tensors
-        invstd = 1.0 / std
-        with torch.cuda.device_of(input):
-            gradInput = gradOutput.new().resize_as_(input).zero_()
-            gradGamma = gradOutput.new().resize_as_(gamma).zero_()
-            gradBeta  = gradOutput.new().resize_as_(beta).zero_()
-            gradMean  = gradOutput.new().resize_as_(mean).zero_()
-            gradStd   = gradOutput.new().resize_as_(std).zero_()
-
-        if isinstance(input, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Float_batchnorm_Backward(
-                    gradOutput, input, gradInput, gradGamma, gradBeta, 
-                    mean, invstd, gamma, beta, gradMean, gradStd,
-                    True) 
-        elif isinstance(input, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Double_batchnorm_Backward(
-                    gradOutput, input, gradInput, gradGamma, gradBeta, 
-                    mean, invstd, gamma, beta, gradMean, gradStd,
-                    True) 
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return gradInput, gradGamma, gradBeta, gradMean, gradStd
-
-
-class batchnormeval(Function):
-    r"""Applies Batch Normalization over a 3d input that is seen as a
-    mini-batch.
-
-    Please see encoding.batchnormtrain_
-    """
-    def forward(ctx, input, gamma, beta, mean, std):
-        ctx.save_for_backward(input, gamma, beta, mean, std)
-        assert(input.dim()==3)
-        with torch.cuda.device_of(input):
-            invstd = 1.0 / std
-            output = input.new().resize_as_(input)
-        if isinstance(input, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Float_batchnorm_Forward(output, 
-                    input, mean, invstd, gamma, beta)
-        elif isinstance(input, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Double_batchnorm_Forward(output, 
-                    input, mean, invstd, gamma, beta)
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return output 
-
-    def backward(ctx, gradOutput):
-        input, gamma, beta, mean, std = ctx.saved_tensors
-        invstd = 1.0 / std
-        with torch.cuda.device_of(input):
-            gradInput = gradOutput.new().resize_as_(input).zero_()
-            gradGamma = gradOutput.new().resize_as_(gamma).zero_()
-            gradBeta  = gradOutput.new().resize_as_(beta).zero_()
-            gradMean  = gradOutput.new().resize_as_(mean).zero_()
-            gradStd   = gradOutput.new().resize_as_(std).zero_()
-        if isinstance(input, torch.cuda.FloatTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Float_batchnorm_Backward(
-                    gradOutput, input, gradInput, gradGamma, gradBeta, 
-                    mean, invstd, gamma, beta, gradMean, gradStd,
-                    False) 
-        elif isinstance(input, torch.cuda.DoubleTensor):
-            with torch.cuda.device_of(input):
-                encoding_lib.Encoding_Double_batchnorm_Backward(
-                    gradOutput, input, gradInput, gradGamma, gradBeta, 
-                    mean, invstd, gamma, beta, gradMean, gradStd,
-                    False) 
-        else:
-            raise RuntimeError('Unimplemented data type!')
-        return gradInput, gradGamma, gradBeta, gradMean, gradStd
-
--- a/experiments/recognition/dataset/cifar10.py
+++ b/experiments/recognition/dataset/cifar10.py
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## ECE Department, Rutgers University
+## Email: zhang.hang@rutgers.edu
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import torch
+import torchvision
+import torchvision.transforms as transforms
+
+class Dataloder():
+    def __init__(self, args):
+        transform_train = transforms.Compose([
+        transforms.RandomCrop(32, padding=4),
+        transforms.RandomHorizontalFlip(),
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), 
+                (0.2023, 0.1994, 0.2010)),
+        ])
+        transform_test = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), 
+                (0.2023, 0.1994, 0.2010)),
+        ])
+
+        trainset = torchvision.datasets.CIFAR10(root='./data', train=True, 
+            download=True, transform=transform_train)
+        testset = torchvision.datasets.CIFAR10(root='./data', train=False, 
+            download=True, transform=transform_test)
+    
+        kwargs = {'num_workers': 4, 'pin_memory': True} if args.cuda else {}
+        trainloader = torch.utils.data.DataLoader(trainset, batch_size=
+            args.batch_size, shuffle=True, **kwargs)
+        testloader = torch.utils.data.DataLoader(testset, batch_size=
+            args.batch_size, shuffle=False, **kwargs)
+        self.trainloader = trainloader 
+        self.testloader = testloader
+    
+    def getloader(self):
+        return self.trainloader, self.testloader
--- a/experiments/recognition/main.py
+++ b/experiments/recognition/main.py
@@ -72,7 +72,7 @@ def main():
            print("=> loaded checkpoint '{}' (epoch {})"
                .format(args.resume, checkpoint['epoch']))
        else:
-            print("=> no resume checkpoint found at '{}'".\
+            raise RuntimeError ("=> no resume checkpoint found at '{}'".\
                format(args.resume))
    scheduler = LR_Scheduler(args, len(train_loader))
    def train(epoch):
@@ -111,15 +111,16 @@ def main():
        for batch_idx, (data, target) in enumerate(tbar):
            if args.cuda:
                data, target = data.cuda(), target.cuda()
-            data, target = Variable(data, volatile=True), Variable(target)
-            output = model(data)
-            test_loss += criterion(output, target).data[0]
-            # get the index of the max log-probability
-            pred = output.data.max(1)[1] 
-            correct += pred.eq(target.data).cpu().sum()
-            total += target.size(0)
-
-            err = 100-100.*correct/total
+            data, target = Variable(data), Variable(target)
+            with torch.no_grad():
+                output = model(data)
+                test_loss += criterion(output, target).data.item()
+                # get the index of the max log-probability
+                pred = output.data.max(1)[1] 
+                correct += pred.eq(target.data).cpu().sum().item()
+                total += target.size(0)
+
+            err = 100-100.0*correct/total
            tbar.set_description('Loss: %.3f | Err: %.3f%% (%d/%d)'% \
                (test_loss/(batch_idx+1), err, total-correct, total))


--- a/experiments/recognition/model/download_cifar_models.sh
+++ b/experiments/recognition/model/download_cifar_models.sh
+cd model
+wget -O encnet_cifar.pth.tar https://www.dropbox.com/s/jgc9y8klfc5wecq/encnet_cifar.pth.tar?dl=1
+cd ..
--- a/experiments/recognition/model/encnet.py
+++ b/experiments/recognition/model/encnet.py
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## ECE Department, Rutgers University
+## Email: zhang.hang@rutgers.edu
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+from .mynn import EncBasicBlock
+import encoding
+
+class Net(nn.Module):
+    def __init__(self, args):
+        super(Net, self).__init__()
+        num_blocks=[2,2,2]
+        block=EncBasicBlock
+        if block == EncBasicBlock:
+            self.expansion = 1
+        else:
+            self.expansion = 4
+
+        self.inplanes = args.widen * 16
+        strides = [1, 2, 2]
+        model = []
+        # Conv_1
+        model += [nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1),
+                  nn.BatchNorm2d(self.inplanes),
+                  nn.ReLU(inplace=True)]
+        # Residual units
+        model += [self._residual_unit(block, self.inplanes, num_blocks[0],
+                                      strides[0], args.ncodes)]
+        for i in range(2):
+            model += [self._residual_unit(block, 
+                      int(2*self.inplanes/self.expansion), 
+                      num_blocks[i+1], strides[i+1], args.ncodes)]
+        # Last conv layer
+        model += [nn.BatchNorm2d(self.inplanes),
+                  nn.ReLU(inplace=True),
+                  nn.AvgPool2d(8),
+                  encoding.nn.View(-1, self.inplanes),
+                  nn.Linear(self.inplanes, args.nclass)]
+
+        self.model = nn.Sequential(*model)
+
+    def _residual_unit(self, block, planes, n_blocks, stride, ncodes):
+        strides = [stride] + [1]*(n_blocks-1)
+        layers = []
+        for i in range(n_blocks):
+            layers += [block(self.inplanes, planes, strides[i], ncodes)]
+            self.inplanes = self.expansion*planes
+        return nn.Sequential(*layers)
+
+    def forward(self, input):
+        return self.model(input)
--- a/experiments/recognition/model/encnetdrop.py
+++ b/experiments/recognition/model/encnetdrop.py
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Hang Zhang
+## ECE Department, Rutgers University
+## Email: zhang.hang@rutgers.edu
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree 
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+from .mynn import EncBasicBlock, EncDropLayer
+import encoding
+
+class Net(nn.Module):
+    def __init__(self, args):
+        super(Net, self).__init__()
+        num_blocks=[2,2,2]
+        block=EncBasicBlock
+        if block == EncBasicBlock:
+            self.expansion = 1
+        else:
+            self.expansion = 4
+
+        self.inplanes = args.widen * 16
+        strides = [1, 2, 2]
+        model = []
+        # Conv_1
+        model += [nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1),
+                  nn.BatchNorm2d(self.inplanes),
+                  nn.ReLU(inplace=True)]
+        # Residual units
+        model += [self._residual_unit(block, self.inplanes, num_blocks[0],
+                                      strides[0], args.ncodes)]
+        for i in range(2):
+            model += [self._residual_unit(block, 
+                      int(2*self.inplanes/self.expansion), 
+                      num_blocks[i+1], strides[i+1], args.ncodes)]
+        # Last conv layer
+        model += [nn.BatchNorm2d(self.inplanes),
+                  nn.ReLU(inplace=True),
+                  nn.AvgPool2d(8),
+                  encoding.nn.View(-1, self.inplanes),
+                  nn.Linear(self.inplanes, args.nclass)]
+
+        self.model = nn.Sequential(*model)
+
+    def _residual_unit(self, block, planes, n_blocks, stride, ncodes):
+        strides = [stride] + [1]*(n_blocks-1)
+        layers = []
+        for i in range(n_blocks):
+            layers += [block(self.inplanes, planes, strides[i], ncodes, ELayer=EncDropLayer)]
+            self.inplanes = self.expansion*planes
+        return nn.Sequential(*layers)
+
+    def forward(self, input):
+        return self.model(input)
+
+def test():
+    net = Net().cuda()
+    print(net)
+    x = Variable(torch.randn(1,3,32,32)).cuda()
+    y = net(x)
+    print(y)
+    params = net.parameters()
+    sum = 0
+    for param in params:
+        sum  += param.nelement()
+    print('Total params:', sum)
+
+
+if __name__ == "__main__":
+    test()