<?xml version='1.0'?>
<!DOCTYPE art SYSTEM 'http://www.biomedcentral.com/xml/article.dtd'>
<art>
   <ui>1476-072X-8-54</ui>
   <ji>1476-072X</ji>
   <fm>
      <dochead>Research</dochead>
      <bibl>
         <title>
            <p>Evaluating geographic imputation approaches for zip code level data: an application to a study of pediatric diabetes</p>
         </title>
         <aug>
            <au id="A1">
               <snm>Hibbert</snm>
               <mi>D</mi>
               <fnm>James</fnm>
               <insr iid="I1"/>
               <email>hibbert@sc.edu</email>
            </au>
            <au ca="yes" id="A2">
               <snm>Liese</snm>
               <mi>D</mi>
               <fnm>Angela</fnm>
               <insr iid="I1"/>
               <email>liese@sc.edu</email>
            </au>
            <au id="A3">
               <snm>Lawson</snm>
               <fnm>Andrew</fnm>
               <insr iid="I2"/>
               <email>lawsonab@musc.edu</email>
            </au>
            <au id="A4">
               <snm>Porter</snm>
               <mi>E</mi>
               <fnm>Dwayne</fnm>
               <insr iid="I3"/>
               <email>porter@sc.edu</email>
            </au>
            <au id="A5">
               <snm>Puett</snm>
               <mi>C</mi>
               <fnm>Robin</fnm>
               <insr iid="I3"/>
               <insr iid="I4"/>
               <insr iid="I5"/>
               <email>rpuett@sc.edu</email>
            </au>
            <au id="A6">
               <snm>Standiford</snm>
               <fnm>Debra</fnm>
               <insr iid="I6"/>
               <email>Debbie.Standiford@cchmc.org</email>
            </au>
            <au id="A7">
               <snm>Liu</snm>
               <fnm>Lenna</fnm>
               <insr iid="I7"/>
               <email>lennall@u.washington.edu</email>
            </au>
            <au id="A8">
               <snm>Dabelea</snm>
               <fnm>Dana</fnm>
               <insr iid="I8"/>
               <email>Dana.Dabelea@ucdenver.edu</email>
            </au>
         </aug>
         <insg>
            <ins id="I1">
               <p>Department of Epidemiology and Biostatistics and Center for Research in Nutrition and Health Disparities, Arnold School of Public Health, University of South Carolina, 921 Assembly Street, Columbia, SC, USA</p>
            </ins>
            <ins id="I2">
               <p>Medical University of South Carolina College of Medicine, 135 Cannon Street, Suite 303, Charleston, SC, USA</p>
            </ins>
            <ins id="I3">
               <p>Department of Environmental Health Sciences, Arnold School of Public Health, University of South Carolina, 921 Assembly Street, Columbia, SC, USA</p>
            </ins>
            <ins id="I4">
               <p>Department of Epidemiology and Biostatistics, Arnold School of Public Health, University of South Carolina, 800 Sumter Street, Columbia, SC, USA</p>
            </ins>
            <ins id="I5">
               <p>South Carolina Cancer Prevention and Control Program, University of South Carolina, 915 Greene Street, Columbia, SC, USA</p>
            </ins>
            <ins id="I6">
               <p>Children's Hospital Medical Center, 3333 Burnet Avenue, Cincinnati, OH, USA</p>
            </ins>
            <ins id="I7">
               <p>University of Washington Child Health Institute, Seattle, WA, USA</p>
            </ins>
            <ins id="I8">
               <p>University of Colorado School of Public Health, 13001 East 17th Avenue, Denver, CO, USA</p>
            </ins>
         </insg>
         <source>International Journal of Health Geographics</source>
         <issn>1476-072X</issn>
         <pubdate>2009</pubdate>
         <volume>8</volume>
         <issue>1</issue>
         <fpage>54</fpage>
         <url>http://www.ij-healthgeographics.com/content/8/1/54</url>
         <xrefbib>
            <pubidlist>
               <pubid idtype="doi">10.1186/1476-072X-8-54</pubid>
               <pubid idtype="pmpid">19814809</pubid>
            </pubidlist>
         </xrefbib>
      </bibl>
      <history>
         <rec>
            <date>
               <day>27</day>
               <month>4</month>
               <year>2009</year>
            </date>
         </rec>
         <acc>
            <date>
               <day>8</day>
               <month>10</month>
               <year>2009</year>
            </date>
         </acc>
         <pub>
            <date>
               <day>8</day>
               <month>10</month>
               <year>2009</year>
            </date>
         </pub>
      </history>
      <cpyrt>
         <year>2009</year>
         <collab>Hibbert et al; licensee BioMed Central Ltd.</collab>
         <note>This is an Open Access article distributed under the terms of the Creative Commons Attribution License (<url>http://creativecommons.org/licenses/by/2.0</url>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</note>
      </cpyrt>
      <abs>
         <sec>
            <st>
               <p>Abstract</p>
            </st>
            <sec>
               <st>
                  <p>Background</p>
               </st>
               <p>There is increasing interest in the study of place effects on health, facilitated in part by geographic information systems. Incomplete or missing address information reduces geocoding success. Several geographic imputation methods have been suggested to overcome this limitation. Accuracy evaluation of these methods can be focused at the level of individuals and at higher group-levels (e.g., spatial distribution).</p>
            </sec>
            <sec>
               <st>
                  <p>Methods</p>
               </st>
               <p>We evaluated the accuracy of eight geo-imputation methods for address allocation from ZIP codes to census tracts at the individual and group level. The spatial apportioning approaches underlying the imputation methods included four fixed (deterministic) and four random (stochastic) allocation methods using land area, total population, population under age 20, and race/ethnicity as weighting factors. Data included more than 2,000 geocoded cases of diabetes mellitus among youth aged 0-19 in four U.S. regions. The imputed distribution of cases across tracts was compared to the true distribution using a chi-squared statistic.</p>
            </sec>
            <sec>
               <st>
                  <p>Results</p>
               </st>
               <p>At the individual level, population-weighted (total or under age 20) fixed allocation showed the greatest level of accuracy, with correct census tract assignments averaging 30.01% across all regions, followed by the race/ethnicity-weighted random method (23.83%). The true distribution of cases across census tracts was that 58.2% of tracts exhibited no cases, 26.2% had one case, 9.5% had two cases, and less than 3% had three or more. This distribution was best captured by random allocation methods, with no significant differences (p-value > 0.90). However, significant differences in distributions based on fixed allocation methods were found (p-value &lt; 0.0003).</p>
            </sec>
            <sec>
               <st>
                  <p>Conclusion</p>
               </st>
               <p>Fixed imputation methods seemed to yield greatest accuracy at the individual level, suggesting use for studies on area-level environmental exposures. Fixed methods result in artificial clusters in single census tracts. For studies focusing on spatial distribution of disease, random methods seemed superior, as they most closely replicated the true spatial distribution. When selecting an imputation approach, researchers should consider carefully the study aims.</p>
            </sec>
         </sec>
      </abs>
   </fm>
   <bdy>
      <sec>
         <st>
            <p>Background</p>
         </st>
         <p>There has long been recognition that place or geographic area can impact health behaviors and health outcomes <abbrgrp><abbr bid="B1">1</abbr><abbr bid="B2">2</abbr><abbr bid="B3">3</abbr><abbr bid="B4">4</abbr></abbrgrp>. The advent of geographic information system (GIS) technology and its widespread dissemination has enormously simplified the identification and characterization of place via address match geocoding, i.e. the assignment of geographic coordinates to a street address through interpolation based on a proportional distance between addresses in a record and an address range for a street segment <abbrgrp><abbr bid="B5">5</abbr></abbrgrp>.</p>
         <p>The validity of epidemiological studies involving geocoded data relies on the proportion of cases that can be geocoded and on the positional accuracy of the geocodes <abbrgrp><abbr bid="B6">6</abbr></abbrgrp>. Successful address match geocoding relies, in part, on the availability of complete and correct address information <abbrgrp><abbr bid="B2">2</abbr></abbrgrp>. However, address information in combination with health attributes is often considered protected health information under the Health Insurance Portability and Accountability Act (HIPAA). Thus only limited address information, such as a ZIP code, may be available for research <abbrgrp><abbr bid="B7">7</abbr></abbrgrp>.</p>
         <p>In the presence of missing or incomplete address data, investigators must decide whether to discard the incomplete data or, based on a variety of assumptions, allocate them to a representative location, e.g. a geometric center or centroid of the smallest geographic unit available, typically in the US, a ZIP code <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>. Discarding incomplete data ensures a database with a high level of accuracy, however may result in a significant reduction in total cases available for analysis. Furthermore, if incompleteness of address data is associated with other attributes under study (i.e. if incomplete data are spatially correlated or predominantly located in rural areas) exclusion could lead to a geographic selection bias <abbrgrp><abbr bid="B8">8</abbr></abbrgrp>.</p>
         <p>Allocating cases to the smallest geographic unit available for all data points ensures that the database retains the maximum possible number of cases, although this method contains several drawbacks. When allocated to the centroid of a geographic unit, cases may fall into uninhabited areas such as lakes or national parks. Also, the geographic units themselves may vary greatly in size and location over a short period of time, as has been shown for postal ZIP codes in the United States (U.S.) <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>.</p>
         <p>Geo-imputation introduces a third option by using available address data in conjunction with assumptions based on available demographic or geographic data. Spatial apportionment of data has a long history of utilization in social sciences <abbrgrp><abbr bid="B10">10</abbr><abbr bid="B11">11</abbr><abbr bid="B12">12</abbr><abbr bid="B13">13</abbr><abbr bid="B14">14</abbr></abbrgrp>. More recently, geo-imputation has become popular in epidemiological studies for allocation of individual study participants to geographic units <abbrgrp><abbr bid="B15">15</abbr><abbr bid="B16">16</abbr></abbrgrp>. Very little is known, however, with respect to the accuracy of geographic imputation methods <abbrgrp><abbr bid="B17">17</abbr></abbrgrp>.</p>
         <p>The purpose of the current study was to evaluate the accuracy and utility of a variety of geo-imputation approaches for ZIP code data at the individual level (i.e. correct allocation of individual case to census tract) and at the group level (i.e. appropriate spatial distribution of cases across tracts). In the context of a project on the spatial epidemiology of diabetes, we used data from the SEARCH for Diabetes in Youth study <abbrgrp><abbr bid="B18">18</abbr></abbrgrp>. We also aimed to describe the data at hand with respect to address completeness and geocoding success.</p>
      </sec>
      <sec>
         <st>
            <p>Research methods</p>
         </st>
         <sec>
            <st>
               <p>Study Design</p>
            </st>
            <p>The present study was approved by Institutional Review Boards (IRB) from all participating entities and conducted using HIPAA compliant procedures. The SEARCH for Diabetes in Youth Study was initiated in 2000 to estimate the population prevalence, and incidence of all types of diabetes in youth in the U.S. by age, gender, race/ethnicity, and diabetes type in four geographically defined populations and two membership/health-plan-based populations using consistent methodology for case ascertainment and classification <abbrgrp><abbr bid="B18">18</abbr></abbrgrp>. For the present study, data from the four geographic defined populations were included, which represent four distinct geographic U.S. regions of varying urban and rural characteristics, population densities, and socioeconomic status. Study sites included Colorado (all 64 counties), Ohio (six counties surrounding Cincinnati, OH, including two in Kentucky and one in Indiana), South Carolina (all 46 counties), and Washington (five counties surrounding Seattle, WA). The study areas varied widely with respect to urban and rural landscapes. Washington and Ohio were exclusively confined to the Seattle, WA and Cincinnati, OH areas respectively, which contained the highest mean population densities at the Census tract level per square kilometer (1379.22 and 1327.66 respectively). South Carolina contained the largest amount of rural landscape with a mean tract population density of 416.77 per square kilometer. The regional land area sizes varied from the 6,826 km<sup>2 </sup>in the Ohio site to 269,736 km<sup>2 </sup>in the Colorado site. Land area was calculated in ArcGIS 9.3 <abbrgrp><abbr bid="B19">19</abbr></abbrgrp> using an equal area projection.</p>
         </sec>
         <sec>
            <st>
               <p>Geocoding of data</p>
            </st>
            <p>The study population included 2,538 youth aged 0-19 years: 2,068 cases were diagnosed between 2002 and 2003 with type 1 and type 2 diabetes and 470 other diabetes cases that were part of a SEARCH case control study. Cases were geocoded based on street address (address matching), ZIP code, or county depending on the availability of address information. The 2000 TIGER (Topographically Integrated Geographic Encoding and Referencing) road network <abbrgrp><abbr bid="B20">20</abbr></abbrgrp> was used for geocoding in ArcGIS 9.3 <abbrgrp><abbr bid="B19">19</abbr></abbrgrp> and was complemented with Zip Code Tabulation Areas (ZCTA). The ZCTA was first used in the 2000 Census, and was created to overcome the difficulties in defining the land area encompassed by a ZIP code <abbrgrp><abbr bid="B20">20</abbr></abbrgrp>. ZCTAs are created through the aggregation of Census blocks into areas that most closely correspond with ZIP code areas <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>.</p>
            <p>Due to Internal Review Board (IRB) logistics, nearly 42% of the cases in the Washington site were restricted to ZIP code only (Table <tblr tid="T1">1</tblr>). A significant number of full addresses available in South Carolina could not be geocoded to the street address level, as these could not be located using 2000 TIGER. 2006 TIGER incorporated more recent changes in the road network, improving the geocoding effort. Thus, geocoding for South Carolina was completed using a combination of TIGER years. The 2000 TIGER centerlines were selected for geocoding in order to more closely match the years in which the case data was collected as street names and ZIP codes may change frequently over time <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>.</p>
            <tbl id="T1">
               <title>
                  <p>Table 1</p>
               </title>
               <caption>
                  <p>Data completeness and geocoding success by site</p>
               </caption>
               <tblbdy cols="5">
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Colorado</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Ohio</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>South Carolina</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Washington</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c cspan="4">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>
                           <b>Total Cases</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>1003</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>360</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>666</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>509</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c>
                        <p/>
                     </c>
                     <c ca="center">
                        <p>
                           <b>
                              <it>n (%)</it>
                           </b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>
                              <it>n (%)</it>
                           </b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>
                              <it>n (%)</it>
                           </b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>
                              <it>n (%)</it>
                           </b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="5">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Full Address Available</p>
                     </c>
                     <c ca="center">
                        <p>943 (94.0%)</p>
                     </c>
                     <c ca="center">
                        <p>333 (92.5%)</p>
                     </c>
                     <c ca="center">
                        <p>512 (76.9%)</p>
                     </c>
                     <c ca="center">
                        <p>295 (58.0%)</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>POBOX/RR Address</p>
                     </c>
                     <c ca="center">
                        <p>27 (2.7%)</p>
                     </c>
                     <c ca="center">
                        <p>2 (0.5%)</p>
                     </c>
                     <c ca="center">
                        <p>42 (6.4%)</p>
                     </c>
                     <c ca="center">
                        <p>5 (1.0%)</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Missing Address (ZIP code only)</p>
                     </c>
                     <c ca="center">
                        <p>33 (3.3%)</p>
                     </c>
                     <c ca="center">
                        <p>25 (7%)</p>
                     </c>
                     <c ca="center">
                        <p>110 (16.7%)</p>
                     </c>
                     <c ca="center">
                        <p>209 (41.0%)</p>
                     </c>
                  </r>
                  <r>
                     <c ca="left">
                        <p>Geocoded Full Address</p>
                     </c>
                     <c ca="center">
                        <p>867 (86.4%)</p>
                     </c>
                     <c ca="center">
                        <p>322 (89.5%)</p>
                     </c>
                     <c ca="center">
                        <p>452 (67.9%)</p>
                     </c>
                     <c ca="center">
                        <p>290 (57.0%)</p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
         </sec>
         <sec>
            <st>
               <p>Data Cleaning and Quality</p>
            </st>
            <p>In a first step, topological anomalies in the ZCTA boundaries were removed. While the ZCTA files contain polygons for individual ZIP codes, water bodies and areas where no addressable postal locations existed were also contained in the file. Unlike other statistical entities from the Census, such as a tract or block group, ZCTAs do not necessarily require a contiguous boundary. This means that a given ZCTA may actually be composed of two or more noncontiguous polygons <abbrgrp><abbr bid="B20">20</abbr></abbrgrp>. These anomalies in the ZCTA boundaries file were dealt with using an approach similar to that taken by Grubesic and Matisziw <abbrgrp><abbr bid="B21">21</abbr></abbrgrp>, whereby polygons identified by the Census as water polygons and polygons containing no addresses were removed. ZCTAs composed of multiple polygons were dissolved into a single polygon based on a common ZIP code.</p>
         </sec>
         <sec>
            <st>
               <p>Calculation of Census Tract Weighting Factors</p>
            </st>
            <p>As shown in Figure <figr fid="F1">1</figr>, ZCTAs do not conform to census tract boundaries and generally cover a larger spatial area than a census tract. The proportion of overlap between the ZCTAs and the census tracts was utilized to obtain either land area-based or population-based weighting factors that were subsequently used in geo-imputation. Each ZCTA was subdivided by the tracts overlapped using geoprocessing components within a GIS. The geometric intersections of ZCTAs and tracts were computed and the tracts (or portions thereof) were joined with the attributes of the ZCTAs.</p>
            <fig id="F1">
               <title>
                  <p>Figure 1</p>
               </title>
               <caption>
                  <p>ZCTA and Census tract boundaries</p>
               </caption>
               <text>
                  <p><b>ZCTA and Census tract boundaries</b>.</p>
               </text>
               <graphic file="1476-072X-8-54-1"/>
            </fig>
            <p>For the calculation of land-area weighting factors, the land area of a given tract that overlapped with a ZCTA was obtained from geoprocessing output and expressed as the proportion of the total ZCTA area. Table <tblr tid="T2">2</tblr> illustrates this approach for ZCTA 29001 which contained five individual tracts. The weight is determined by dividing the land area of each tract within the ZCTA by the total ZCTA land area.</p>
            <tbl id="T2">
               <title>
                  <p>Table 2</p>
               </title>
               <caption>
                  <p>Weighting by land area</p>
               </caption>
               <tblbdy cols="5">
                  <r>
                     <c ca="center">
                        <p>
                           <b>ZCTA</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>ZCTA Area (km<sup><b>2</b></sup>)</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Tract ID</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Tract Area in ZCTA (km<sup><b>2</b></sup>)</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Proportion Tract Area in ZCTA</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="5">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>29001</p>
                     </c>
                     <c ca="center">
                        <p>202.54</p>
                     </c>
                     <c ca="center">
                        <p>T<sub>1</sub></p>
                     </c>
                     <c ca="center">
                        <p>171.37</p>
                     </c>
                     <c ca="center">
                        <p>0.84</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>29001</p>
                     </c>
                     <c ca="center">
                        <p>202.54</p>
                     </c>
                     <c ca="center">
                        <p>T<sub>2</sub></p>
                     </c>
                     <c ca="center">
                        <p>15.75</p>
                     </c>
                     <c ca="center">
                        <p>0.07</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>29001</p>
                     </c>
                     <c ca="center">
                        <p>202.54</p>
                     </c>
                     <c ca="center">
                        <p>T<sub>3</sub></p>
                     </c>
                     <c ca="center">
                        <p>9.77</p>
                     </c>
                     <c ca="center">
                        <p>0.05</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>29001</p>
                     </c>
                     <c ca="center">
                        <p>202.54</p>
                     </c>
                     <c ca="center">
                        <p>T<sub>4</sub></p>
                     </c>
                     <c ca="center">
                        <p>5.61</p>
                     </c>
                     <c ca="center">
                        <p>0.03</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>29001</p>
                     </c>
                     <c ca="center">
                        <p>202.54</p>
                     </c>
                     <c ca="center">
                        <p>T<sub>5</sub></p>
                     </c>
                     <c ca="center">
                        <p>0.03</p>
                     </c>
                     <c ca="center">
                        <p>0.01</p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
            <p>For the calculation of population-based weights, data were used from the block level Census Summary File 1 (SF1) <abbrgrp><abbr bid="B22">22</abbr></abbrgrp>. First, the total population was calculated for each ZCTA by summing the population estimates for all census blocks contained within a ZCTA. Blocks are contiguous with ZCTA boundaries (Figure <figr fid="F2">2</figr>) and were used to calculate census demographic data for each ZCTA and each tract proportion (Figure <figr fid="F3">3</figr>). Two types of population-based weights were investigated, based either on total population or on population 19 years or below. The population aged 0-19 was calculated using a summation of SF1 variables: male under 5 years (P012003), male 5 to 9 years (P012004), male 10 to 14 years (P12005), male 15 to 17 years (P012006), male 18 and 19 years (P012007), female under 5 years (P012027), female 5 to 9 years (P012028), female 10 to 14 years (P012029), female 15 to 17 years (P012030), and female 18 and 19 years (P012031). Total population was imported from variable P001001. Tract proportions containing zero population received a weight of zero (Figure <figr fid="F3">3</figr>) and were not considered in any population-weighted imputation.</p>
            <fig id="F2">
               <title>
                  <p>Figure 2</p>
               </title>
               <caption>
                  <p>Block centroids and tracts within a ZCTA</p>
               </caption>
               <text>
                  <p><b>Block centroids and tracts within a ZCTA</b>.</p>
               </text>
               <graphic file="1476-072X-8-54-2"/>
            </fig>
            <fig id="F3">
               <title>
                  <p>Figure 3</p>
               </title>
               <caption>
                  <p>Weighting of tracts within a ZCTA</p>
               </caption>
               <text>
                  <p><b>Weighting of tracts within a ZCTA</b>.</p>
               </text>
               <graphic file="1476-072X-8-54-3"/>
            </fig>
         </sec>
         <sec>
            <st>
               <p>Geo-imputation methods</p>
            </st>
            <p>Two general types of geo-imputation methods were evaluated including fixed (deterministic) and random (stochastic) geo-imputation approaches. For each of these, both population and area based weighting factors were applied.</p>
            <p>For the fixed allocation approaches, all cases within a ZCTA were allocated to the tract with the largest weighting factor as described above (i.e. area, total population, or total youth population weighting factor). These methods are abbreviated in the text and tables as a) <it>FixedArea</it>: Fixed area-weighted allocation; b) <it>FixedPop</it>: Fixed total population-weighted allocation; and c) <it>Fixed019</it>: Fixed population-weighted using 0-19 age group. In addition, we performed the most commonly used fixed allocation method which allocates a case to the ZIP centroid, which was designated d) <it>FixedZip</it>.</p>
            <p>The random allocation approaches used methods similar to those described by Henry and Boscoe <abbrgrp><abbr bid="B17">17</abbr></abbrgrp>. The weights obtained either from land-area or population-based calculations described above represented the chance of being allocated to a tract <abbrgrp><abbr bid="B17">17</abbr></abbrgrp>. For <it>n </it>tracts within a ZCTA, there will be <it>n </it>proportions. Let us assume that there are five tracts (<it>T</it><sub><it>1...5</it></sub>) overlapping ZCTA <it>Z</it><sub>1 </sub>and that their proportional contributions (for either area or population) are 0.84, 0.7, 0.5, 0.3 and 0.1 respectively. For each tract, a range is created using proportion weights (Table <tblr tid="T3">3</tblr>). Subsequently, a random number [0.0-1.0] is generated. Each case is then allocated to the tract that contains the range of weights into which the random number is contained. This approach results in the probability of an assignment in a particular tract being equivalent to the proportion of the metric being evaluated, e.g., area-weighted. For example, the range of T<sub>1 </sub>(Table <tblr tid="T3">3</tblr>) of 0.00-0.84 results in an 84% chance of a case randomly assigned to T<sub>1</sub></p>
            <tbl id="T3">
               <title>
                  <p>Table 3</p>
               </title>
               <caption>
                  <p>Weighting and ranges for allocation to tracts</p>
               </caption>
               <tblbdy cols="5">
                  <r>
                     <c ca="center">
                        <p>
                           <b>Tract ID</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Tract Area in ZCTA (km<sup><b>2</b></sup>)</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Proportion Tract Area in ZCTA</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Cumulative Proportion</b>
                        </p>
                     </c>
                     <c ca="center">
                        <p>
                           <b>Range</b>
                        </p>
                     </c>
                  </r>
                  <r>
                     <c cspan="5">
                        <hr/>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>T<sub>1</sub></p>
                     </c>
                     <c ca="center">
                        <p>171.37</p>
                     </c>
                     <c ca="center">
                        <p>0.84</p>
                     </c>
                     <c ca="center">
                        <p>0.84</p>
                     </c>
                     <c ca="center">
                        <p>0.00 - 0.84</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>T<sub>2</sub></p>
                     </c>
                     <c ca="center">
                        <p>15.75</p>
                     </c>
                     <c ca="center">
                        <p>0.07</p>
                     </c>
                     <c ca="center">
                        <p>0.91</p>
                     </c>
                     <c ca="center">
                        <p>0.84 - 0.91</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>T<sub>3</sub></p>
                     </c>
                     <c ca="center">
                        <p>9.77</p>
                     </c>
                     <c ca="center">
                        <p>0.05</p>
                     </c>
                     <c ca="center">
                        <p>0.96</p>
                     </c>
                     <c ca="center">
                        <p>0.91 - 0.96</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>T<sub>4</sub></p>
                     </c>
                     <c ca="center">
                        <p>5.61</p>
                     </c>
                     <c ca="center">
                        <p>0.03</p>
                     </c>
                     <c ca="center">
                        <p>0.99</p>
                     </c>
                     <c ca="center">
                        <p>0.96 - 0.99</p>
                     </c>
                  </r>
                  <r>
                     <c ca="center">
                        <p>T<sub>5</sub></p>
                     </c>
                     <c ca="center">
                        <p>0.03</p>
                     </c>
                     <c ca="center">
                        <p>0.01</p>
                     </c>
                     <c ca="center">
                        <p>1.00</p>
                     </c>
                     <c ca="center">
                        <p>0.99 - 1.00</p>
                     </c>
                  </r>
               </tblbdy>
            </tbl>
            <p>The random allocation methods are abbreviated in text and table as a) <it>RandArea</it>: Random area-weighted allocation; b) <it>RandPop</it>: Random total population-weighted allocation; c) <it>Rand019</it>: Random population-weighted using 0-19 year age group; and <it>RandRace019</it>: Random method using allocation by population distribution of 0-19 year old population by race/ethnicity. Race/ethnicity groups considered included non-Hispanic white, African American, Asian, Native American, and multi-ethnic/other. These categories represented all possible groups within the dataset.</p>
         </sec>
         <sec>
            <st>
               <p>Statistical methods</p>
            </st>
            <p>Data are presented descriptively as percents and absolute numbers. Individual level accuracy assessments are represented as percent cases allocated correctly to a tract through geo-imputation methods. The distribution of cases to tracts achieved by the allocation methods was compared to the true distribution using the Chi-square statistic.</p>
         </sec>
      </sec>
      <sec>
         <st>
            <p>Results</p>
         </st>
         <p>Address data characteristics and geocoding characteristics are summarized in Table <tblr tid="T1">1</tblr>. No site had complete address information for all cases, but both Colorado and Ohio had a markedly higher proportion of full addresses available than South Carolina and Washington, which were unable to obtain full addresses on a fraction of cases due to HIPAA related restrictions. An address is considered to be full if it contains a street number, street name, street type and ZIP code. South Carolina had a markedly higher number of addresses with PO Box or RR (rural route) designations. Both the Ohio and Colorado sites had the overall highest proportion of successfully geocoded addresses (CO = 86.4%, OH = 89.5%) The geocoding success rate (expressed as a proportion of full addresses available) was highly consistent across sites ranging from 92% in Colorado, 97% Ohio, 88% in South Carolina, and 98% in Washington.</p>
         <p>To evaluate the various geo-imputation methods, the dataset was limited to those cases with a geocoded full address (total 1,931 cases). Each of the eight allocation methods were applied to the site-specific data assuming that the only available piece of address information available was a ZIP code (i.e. a worst case scenario) and then compared with the known, true location.</p>
         <p>Table <tblr tid="T4">4</tblr> summarizes the individual-level accuracy of the imputation approaches. The <it>Fixed019 </it>and <it>FixedPop </it>methods performed best at the individual level, with identical results in all sites except South Carolina. The proportion of cases correctly assigned to their census tract ranged from 23% to 37% across the sites (overall mean 30.26%). The commonly used <it>FixedZIP </it>method, the <it>FixedArea </it>method and the <it>RandArea </it>method performed extremely poorly. The <it>RandRace019 </it>method saw a slight improvement when compared to the other random allocation methods at the individual level for three of the four sites. However we observed a 5% reduction in accuracy in the Washington site with <it>RandRace019</it>.</p>
         <tbl id="T4">
            <title>
               <p>Table 4</p>
            </title>
            <caption>
               <p>Individual level accuracy of fixed and random geo-imputation methods by site</p>
            </caption>
            <tblbdy cols="9">
               <r>
                  <c>
                     <p/>
                  </c>
                  <c cspan="7" ca="center">
                     <p>
                        <b>Geo-imputation methods</b>
                     </p>
                  </c>
                  <c>
                     <p/>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c cspan="8">
                     <hr/>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c ca="center">
                     <p>
                        <b>FixedZip</b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>FixedArea</b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>FixedPop</b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>Fixed019</b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>RandArea</b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>RandPop</b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>Rand019</b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>RandRace019</b>
                     </p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c ca="center">
                     <p>
                        <b>
                           <it>%</it>
                        </b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>
                           <it>%</it>
                        </b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>
                           <it>%</it>
                        </b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>
                           <it>%</it>
                        </b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>
                           <it>%</it>
                        </b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>
                           <it>%</it>
                        </b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>
                           <it>%</it>
                        </b>
                     </p>
                  </c>
                  <c ca="center">
                     <p>
                        <b>
                           <it>%</it>
                        </b>
                     </p>
                  </c>
               </r>
               <r>
                  <c cspan="9">
                     <hr/>
                  </c>
               </r>
               <r>
                  <c ca="left">
                     <p>Colorado</p>
                  </c>
                  <c ca="center">
                     <p>16.77</p>
                  </c>
                  <c ca="center">
                     <p>14.44</p>
                  </c>
                  <c ca="center">
                     <p>23.03</p>
                  </c>
                  <c ca="center">
                     <p>23.03</p>
                  </c>
                  <c ca="center">
                     <p>13.94</p>
                  </c>
                  <c ca="center">
                     <p>21.11</p>
                  </c>
                  <c ca="center">
                     <p>19.80</p>
                  </c>
                  <c ca="center">
                     <p>21.40</p>
                  </c>
               </r>
               <r>
                  <c ca="left">
                     <p>Ohio</p>
                  </c>
                  <c ca="center">
                     <p>21.12</p>
                  </c>
                  <c ca="center">
                     <p>22.98</p>
                  </c>
                  <c ca="center">
                     <p>33.54</p>
                  </c>
                  <c ca="center">
                     <p>33.54</p>
                  </c>
                  <c ca="center">
                     <p>21.43</p>
                  </c>
                  <c ca="center">
                     <p>20.50</p>
                  </c>
                  <c ca="center">
                     <p>21.12</p>
                  </c>
                  <c ca="center">
                     <p>25.50</p>
                  </c>
               </r>
               <r>
                  <c ca="left">
                     <p>South Carolina</p>
                  </c>
                  <c ca="center">
                     <p>26.72</p>
                  </c>
                  <c ca="center">
                     <p>30.34</p>
                  </c>
                  <c ca="center">
                     <p>37.21</p>
                  </c>
                  <c ca="center">
                     <p>35.69</p>
                  </c>
                  <c ca="center">
                     <p>25.57</p>
                  </c>
                  <c ca="center">
                     <p>28.63</p>
                  </c>
                  <c ca="center">
                     <p>27.29</p>
                  </c>
                  <c ca="center">
                     <p>30.13</p>
                  </c>
               </r>
               <r>
                  <c ca="left">
                     <p>Washington</p>
                  </c>
                  <c ca="center">
                     <p>21.72</p>
                  </c>
                  <c ca="center">
                     <p>16.21</p>
                  </c>
                  <c ca="center">
                     <p>27.24</p>
                  </c>
                  <c ca="center">
                     <p>27.24</p>
                  </c>
                  <c ca="center">
                     <p>14.83</p>
                  </c>
                  <c ca="center">
                     <p>20.34</p>
                  </c>
                  <c ca="center">
                     <p>22.76</p>
                  </c>
                  <c ca="center">
                     <p>18.30</p>
                  </c>
               </r>
            </tblbdy>
         </tbl>
         <p>Results of the evaluation of group level accuracy are summarized in Additional File <supplr sid="S1">1</supplr>. The column entitled "<it>True</it>" lists the number of tracts that contain a given number of cases ranging from 0 to greater than 5. Given that diabetes in youth is a rare condition and our study was focused on incident cases, it was not surprising that across the entire study area more than 50% of all tracts did not contain a single case. In general, between 24 and 29% of tracts contained a single case with a sequentially decreasing proportion of tracts containing multiple cases. The remainder of the table describes the allocation of cases to tracts achieved by each of the eight imputation methods.</p>
         <suppl id="S1">
            <title>
               <p>Additional file 1</p>
            </title>
            <text>
               <p><b>Group level geo-imputation accuracy</b>. Table summarizing group level geo-imputation accuracy across all four study sites.</p>
            </text>
            <file name="1476-072X-8-54-S1.DOC">
               <p>Click here for file</p>
            </file>
         </suppl>
         <p>The distribution of cases across tracts was then compared using the Chi-square statistic (Table <tblr tid="T5">5</tblr>). Significant differences were observed between the distribution achieved by the <it>FixedZip</it>, <it>FixedArea</it>, <it>FixedPop </it>and <it>Fixed019 </it>imputation methods compared to the true distributions observed in our data. In contrast, none of the four random allocation methods seemed to differ significantly from true allocation, which suggests that these methods are superior to any of the fixed methods at the level of group accuracy. Both the <it>Rand019 </it>and <it>RandPop </it>methods performed similarly, with the youth population weighting being somewhat advantageous in South Carolina, Ohio and Washington.</p>
         <tbl id="T5">
            <title>
               <p>Table 5</p>
            </title>
            <caption>
               <p>Chi-square statistics associated with group level accuracy</p>
            </caption>
            <tblbdy cols="9">
               <r>
                  <c>
                     <p/>
                  </c>
                  <c cspan="7" ca="center">
                     <p>
                        <b>Geo-imputation methods</b>
                     </p>
                  </c>
                  <c>
                     <p/>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c cspan="8">
                     <hr/>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c ca="left">
                     <p>
                        <b>FixedZip</b>
                     </p>
                  </c>
                  <c ca="left">
                     <p>
                        <b>FixedArea</b>
                     </p>
                  </c>
                  <c ca="left">
                     <p>
                        <b>FixedPop</b>
                     </p>
                  </c>
                  <c ca="left">
                     <p>
                        <b>Fixed019</b>
                     </p>
                  </c>
                  <c ca="left">
                     <p>
                        <b>RandArea</b>
                     </p>
                  </c>
                  <c ca="left">
                     <p>
                        <b>RandPop</b>
                     </p>
                  </c>
                  <c ca="left">
                     <p>
                        <b>Rand019</b>
                     </p>
                  </c>
                  <c ca="left">
                     <p>
                        <b>RandRace019</b>
                     </p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
               </r>
               <r>
                  <c ca="left">
                     <p>Colorado</p>
                  </c>
                  <c ca="left">
                     <p>399.4479</p>
                  </c>
                  <c ca="left">
                     <p>427.7909</p>
                  </c>
                  <c ca="left">
                     <p>388.6003</p>
                  </c>
                  <c ca="left">
                     <p>386.8368</p>
                  </c>
                  <c ca="left">
                     <p>7.5191</p>
                  </c>
                  <c ca="left">
                     <p>1.038</p>
                  </c>
                  <c ca="left">
                     <p>1.2907</p>
                  </c>
                  <c ca="left">
                     <p>3.7910</p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.1848</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.9594</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.9359</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.5799</p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
               </r>
               <r>
                  <c ca="left">
                     <p>Ohio</p>
                  </c>
                  <c ca="left">
                     <p>141.5495</p>
                  </c>
                  <c ca="left">
                     <p>152.4194</p>
                  </c>
                  <c ca="left">
                     <p>139.2934</p>
                  </c>
                  <c ca="left">
                     <p>139.2934</p>
                  </c>
                  <c ca="left">
                     <p>3.0906</p>
                  </c>
                  <c ca="left">
                     <p>1.362</p>
                  </c>
                  <c ca="left">
                     <p>1.2907</p>
                  </c>
                  <c ca="left">
                     <p>1.8665</p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.686</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.9594</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.9359</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.8673</p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
               </r>
               <r>
                  <c ca="left">
                     <p>South Carolina</p>
                  </c>
                  <c ca="left">
                     <p>146.8333</p>
                  </c>
                  <c ca="left">
                     <p>141.8189</p>
                  </c>
                  <c ca="left">
                     <p>149.1956</p>
                  </c>
                  <c ca="left">
                     <p>143.6908</p>
                  </c>
                  <c ca="left">
                     <p>4.3042</p>
                  </c>
                  <c ca="left">
                     <p>7.6184</p>
                  </c>
                  <c ca="left">
                     <p>1.7513</p>
                  </c>
                  <c ca="left">
                     <p>1.0542</p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.5065</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.1786</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.8824</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.9580</p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
                  <c>
                     <p/>
                  </c>
               </r>
               <r>
                  <c ca="left">
                     <p>Washington</p>
                  </c>
                  <c ca="left">
                     <p>146.5466</p>
                  </c>
                  <c ca="left">
                     <p>23.6656</p>
                  </c>
                  <c ca="left">
                     <p>22.777</p>
                  </c>
                  <c ca="left">
                     <p>129.8429</p>
                  </c>
                  <c ca="left">
                     <p>1.4884</p>
                  </c>
                  <c ca="left">
                     <p>1.1255</p>
                  </c>
                  <c ca="left">
                     <p>0.2134</p>
                  </c>
                  <c ca="left">
                     <p>3.8452</p>
                  </c>
               </r>
               <r>
                  <c>
                     <p/>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.0003</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.0004</p>
                  </c>
                  <c ca="left">
                     <p>p &lt; 0.0001</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.9144</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.9518</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.999</p>
                  </c>
                  <c ca="left">
                     <p>p = 0.5719</p>
                  </c>
               </r>
            </tblbdy>
         </tbl>
      </sec>
      <sec>
         <st>
            <p>Discussion</p>
         </st>
         <p>The individual level accuracy of eight imputation methods was assessed for over 2,000 cases of diabetes across four U.S. regions. This study is among the few to determine accuracy of geo-imputation methods using collected clinical data that had been geocoded through HIPAA compliant procedures. The vast majority of published epidemiologic work to date that has dealt with incomplete address information has reported allocating missing data to ZIP code centroid <abbrgrp><abbr bid="B9">9</abbr><abbr bid="B23">23</abbr><abbr bid="B24">24</abbr></abbrgrp>. This can be problematic as ZIP codes are less spatiotemporally stable than Census statistical areas such as tracts or block groups <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>. Investigators should pay particular attention when comparing identical ZIP codes from datasets that are temporally dissimilar.</p>
         <p>At the level of individual assignment, fixed population-weighted methods showed a mean accuracy of 30.26% (Min 23.03%, Max 33.54% using total population weight) and 30.45% (Min 23.03%, Max 37.98% using youth population ages 0-19 weight). Although these geo-imputation methods led to a disproportionate number of cases allocated to a single tract within a ZCTA, instances exist where this method would be useful. Heavily urbanized residential areas with high population density will contain tracts and ZCTAs smaller in land area and simplify distance calculation to exposure sites <abbrgrp><abbr bid="B25">25</abbr></abbrgrp>.</p>
         <p>Although the individual case accuracy of the random methods was lower than fixed methods, randomization allowed for each tract in a ZCTA to have a chance of a case being allocated to it. This allowed for a distribution more closely approximating that seen in reality (i.e. the <it>True </it>column in Additional File <supplr sid="S1">1</supplr>). Randomized allocation applied to the youth population from Census SF1 was found to provide the best approximation of the true distribution of cases within census tracts for all sites.</p>
         <p>Individual accuracy of all methods varied geographically. Colorado results were lowest among most of the eight methods. Colorado comprised the largest total land area and South Carolina was the least densely populated of the four sites. Tract size for Colorado was also largest, averaging 254 km<sup>2</sup>. Interestingly, it was anticipated that sites containing tracts of smaller land area achieve highest accuracy with Washington and Ohio being smallest with average tract areas of 29.53 km<sup>2 </sup>and 26.14 km<sup>2</sup> respectively. However, South Carolina (average tract area 92.32 km<sup>2</sup>) results were consistently highest among all eight methods with Ohio and Washington being 2<sup>nd </sup>or 3<sup>rd </sup>when comparing each method's accuracy across sites (Table <tblr tid="T4">4</tblr>).</p>
         <p>Compared to the fixed allocation methods, random population-weighted methods showed a mean accuracy of 22.64% at the individual level (Min 20.34%, Max 28.63% using total population weights), 21.07% (Min 17.47%, Max 26.72% using youth population ages 0-19 weights) and 23.83% (Min 18.30, Max 30.13) using youth population and race/ethnicity. Henry and Boscoe <abbrgrp><abbr bid="B17">17</abbr></abbrgrp> saw a similar accuracy of 25.9% using total population as a weighting mechanism.</p>
         <p>At the level of group accuracy, the <it>RandPop </it>and <it>Rand019 </it>methods performed similarly across all sites except Colorado, with <it>RandPop </it>(p = 0.9594) being slightly better than <it>Rand019 </it>(p = 0.9359) and South Carolina with the <it>RandRace019 </it>performing best (p = 0.9580). This may be due in part to both the rural nature of South Carolina, and to the larger amount of people over 65, particularly within coastal areas. <it>RandArea </it>performed the poorest across all sites when compared to the true distribution. To the best of our knowledge, this is the first paper to evaluate the ability of geo-imputation approaches to approximate distribution of cases across space.</p>
         <p>In our study geography, a ZIP code overlapped with a median number of 4 (minimum 1, maximum 29) Census tracts. This relationship in fact sets a sort of upper limit on the individual-level accuracy of any imputation method, because as the number of tracts per ZIP code increases, the likelihood of correct assignment of an individual decreases, hence, the low overall magnitude of the individual level accuracy of the geo-imputation methods. Furthermore, this relationship between ZIP codes and tracts is likely responsible for the fact that in our data, the fixed allocation methods performed better than any of the random allocation methods at the individual level.</p>
         <p>Henry and Boscoe <abbrgrp><abbr bid="B17">17</abbr></abbrgrp> showed that weighting using multiple covariates such as race/ethnicity in addition to age achieves higher accuracy. Correspondingly, we refined the weighting using the population of youth aged 0-19 years by additionally considering the race/ethnic composition of the population of youth. Consistent with previous findings, this approach produced a slight increase in accuracy in the Colorado, Ohio, and South Carolina study sites at the individual level. However, the Washington site experienced a 4% drop in accuracy when accounting for race. It is conceivable that in the Washington site, both the lower levels of residential racial segregation in urban Seattle plus the larger ethnic and multi-racial diversity of the Seattle population contribute to the loss in specificity of an assignment, thereby increasing inaccuracy.</p>
         <p>It is important to note that the geo-imputation methods shown were conducted entirely within the GIS framework and utilized custom tools developed to handle the random allocation and extend the capabilities of the GIS. Although it is entirely possible to use purely statistical allocation, GIS was essential to both the rapid implementation of the geo-imputation methods as well as the weighting calculations, particularly the area-based weights. Investigators wishing to use geo-imputation methods should take into account the benefits offered in these software packages. Investigators may contact the author to obtain the tool created to perform the geo-imputations presented in this paper.</p>
         <p>It has been well established that geocoding success rate can differ significantly with respect to urban and rural areas and can be seen as being correlated with population density <abbrgrp><abbr bid="B6">6</abbr><abbr bid="B25">25</abbr><abbr bid="B26">26</abbr></abbrgrp>. Since address match geocoding is accomplished through interpolation along a street segment, a longer segment common to rural areas may introduce greater error. Furthermore, addresses drawn from rural areas are more likely to contain PO Boxes or Rural Routes as address information, confounding the geocoding process <abbrgrp><abbr bid="B27">27</abbr></abbrgrp>.</p>
         <p>A fundamental, very conservative assumption of the present analysis is that a ZIP code is the only address portion available on the entire data set. In many instances geoimputation would only be applied to the non-geocodable subset of the addresses. Addresses lacking other portions of a geocodable address (in this case, street number, street name, street type) would likely produce different results using these imputation methods. Furthermore, geo-imputation cannot fully compensate for low-quality address data, although it can provide a valuable solution in instances where an analysis will be conducted at spatial units smaller than those available for all cases. Other methods such as dasymetric mapping <abbrgrp><abbr bid="B28">28</abbr><abbr bid="B29">29</abbr></abbrgrp>, manual intervention/interactive geocoding or re-coding using a different geocoding strategy may in some instances be preferable <abbrgrp><abbr bid="B30">30</abbr></abbrgrp>.</p>
         <p>Although ZCTAs are used by the Census to represent the land area covered by a ZIP code, investigators must consider the potential for spatiotemporal mismatch of current ZIP codes to Census derived ZCTAs <abbrgrp><abbr bid="B9">9</abbr></abbrgrp>. Since the primary function of ZIP codes is to aid the USPS in efficient mail delivery, it is necessary that ZIP codes be updated frequently between Census dates to reflect changes in population and the changes may not be well documented <abbrgrp><abbr bid="B7">7</abbr></abbrgrp>.</p>
      </sec>
      <sec>
         <st>
            <p>Conclusion</p>
         </st>
         <p>In summary, our evaluation of geo-imputation approaches for ZIP code level data indicates that while fixed imputation methods yield the greatest accuracy at the individual level, random methods most closely replicate the true distribution of locations across space. Our study illustrates the wide range of geo-imputation approaches that may be considered above and beyond the commonly used ZIP code centroid method. It remains up to the investigator to fully understand the implications of handling missing address data with the methods available and to carefully consider the purpose of the study when selecting an imputation approach.</p>
      </sec>
      <sec>
         <st>
            <p>Competing interests</p>
         </st>
         <p>The authors declare that they have no competing interests.</p>
      </sec>
      <sec>
         <st>
            <p>Authors' contributions</p>
         </st>
         <p>JH acquired the data, analyzed and interpreted the data, drafted and revised the manuscript.</p>
         <p>ALi contributed to acquiring the data and design of the study and drafted parts of the manuscript and made critical revisions to the manuscript. ALa made contributions to conception and design and revision of the manuscript. DP made contributions to conception and design and revision of the manuscript. RP contributed to the analysis and interpretation of the data and revisions of the manuscript. DS, LL, and DD contributed to acquiring the data and critical revision of the manuscript. All authors read and approved the final manuscript.</p>
      </sec>
   </bdy>
   <bm>
      <ack>
         <sec>
            <st>
               <p>Acknowledgements</p>
            </st>
            <p>We would like to thank the SEARCH investigators, staff and participants for making this project possible.</p>
            <p>The project was supported by Award Number R01DK077131 from the National Institute Of Diabetes And Digestive And Kidney Diseases. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institute Of Diabetes And Digestive And Kidney Diseases or the National Institutes of Health</p>
         </sec>
      </ack>
      <refgrp>
         <bibl id="B1">
            <aug>
               <au>
                  <snm>Snow</snm>
                  <fnm>J</fnm>
               </au>
            </aug>
            <source>On the Mode of Communication of Cholera</source>
            <publisher>London: Churchill</publisher>
            <pubdate>1855</pubdate>
         </bibl>
         <bibl id="B2">
            <aug>
               <au>
                  <snm>Cromley</snm>
                  <fnm>EK</fnm>
               </au>
               <au>
                  <snm>McLafferty</snm>
                  <fnm>SL</fnm>
               </au>
            </aug>
            <source>GIS and Public Health</source>
            <publisher>New York: Guilford Press</publisher>
            <pubdate>2002</pubdate>
         </bibl>
         <bibl id="B3">
            <aug>
               <au>
                  <snm>Gatrell</snm>
                  <fnm>A</fnm>
               </au>
            </aug>
            <source>Geographies of Health</source>
            <publisher>Malden, MA: Blackwell</publisher>
            <pubdate>2002</pubdate>
         </bibl>
         <bibl id="B4">
            <aug>
               <au>
                  <snm>Lawson</snm>
                  <fnm>AB</fnm>
               </au>
            </aug>
            <source>Statistical Methods in Spatial Epidemiology</source>
            <publisher>New York: Wiley</publisher>
            <edition>2</edition>
            <pubdate>2006</pubdate>
         </bibl>
         <bibl id="B5">
            <title>
               <p>Statistical methods for incompletely and incorrectly geocoded cancer data</p>
            </title>
            <aug>
               <au>
                  <snm>Zimmerman</snm>
                  <fnm>DL</fnm>
               </au>
            </aug>
            <source>Geocoding Health Data: The Use of Geographic Codes in Cancer Prevention and Control, Research and Practice</source>
            <publisher>Boca Raton, Florida: CRC Press</publisher>
            <editor>Rushton G, Armstrong MP, Gittler J, Greene BR, Pavlik CE, West MM, Zimmerman DL</editor>
            <pubdate>2007</pubdate>
         </bibl>
         <bibl id="B6">
            <title>
               <p>Positional accuracy of geocoded addresses in epidemiologic research</p>
            </title>
            <aug>
               <au>
                  <snm>Bonner</snm>
                  <fnm>MR</fnm>
               </au>
               <au>
                  <snm>Daikwon</snm>
                  <fnm>H</fnm>
               </au>
               <au>
                  <snm>Nie</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Rogerson</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Vena</snm>
                  <fnm>JE</fnm>
               </au>
               <au>
                  <snm>Freudenheim</snm>
                  <fnm>JL</fnm>
               </au>
            </aug>
            <source>Epidemiology</source>
            <pubdate>2003</pubdate>
            <volume>14</volume>
            <fpage>408</fpage>
            <lpage>412</lpage>
            <xrefbib>
               <pubid idtype="pmpid" link="fulltext">12843763</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B7">
            <title>
               <p>Geocoding in cancer research: a review</p>
            </title>
            <aug>
               <au>
                  <snm>Rushton</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Armstrong</snm>
                  <fnm>MP</fnm>
               </au>
               <au>
                  <snm>Gittler</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Greene</snm>
                  <fnm>BR</fnm>
               </au>
               <au>
                  <snm>Pavlik</snm>
                  <fnm>CE</fnm>
               </au>
               <au>
                  <snm>West</snm>
                  <fnm>MM</fnm>
               </au>
               <au>
                  <snm>Zimmerman</snm>
                  <fnm>D</fnm>
               </au>
            </aug>
            <source>Am J Prev Med</source>
            <pubdate>2006</pubdate>
            <volume>30</volume>
            <fpage>S16</fpage>
            <lpage>S24</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.amepre.2005.09.011</pubid>
                  <pubid idtype="pmpid" link="fulltext">16458786</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B8">
            <aug>
               <au>
                  <snm>Rushton</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Armstrong</snm>
                  <fnm>MP</fnm>
               </au>
               <au>
                  <snm>Gittler</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Greene</snm>
                  <fnm>BR</fnm>
               </au>
               <au>
                  <snm>Pavlik</snm>
                  <fnm>CE</fnm>
               </au>
               <au>
                  <snm>West</snm>
                  <fnm>MM</fnm>
               </au>
               <au>
                  <snm>Zimmerman</snm>
                  <fnm>DL</fnm>
               </au>
            </aug>
            <source>Geocoding Health Data: The Use of Geographic Codes in Cancer Prevention and Control</source>
            <publisher>Boca Raton, FL: CRC Press</publisher>
            <pubdate>2007</pubdate>
         </bibl>
         <bibl id="B9">
            <title>
               <p>Zip code caveat: bias due to spatiotemporal mismatches between zip codes and US census-defined geographic areas--the Public Health Disparities Geocoding Project</p>
            </title>
            <aug>
               <au>
                  <snm>Krieger</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Waterman</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Chen</snm>
                  <fnm>JT</fnm>
               </au>
               <au>
                  <snm>Soobader</snm>
                  <fnm>MJ</fnm>
               </au>
               <au>
                  <snm>Subramanian</snm>
                  <fnm>SV</fnm>
               </au>
               <au>
                  <snm>Carson</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>Am J Public Health</source>
            <pubdate>2002</pubdate>
            <volume>92</volume>
            <fpage>1100</fpage>
            <lpage>1102</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.2105/AJPH.92.7.1100</pubid>
                  <pubid idtype="pmcid">1447194</pubid>
                  <pubid idtype="pmpid" link="fulltext">12084688</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B10">
            <title>
               <p>Reassessing Racial and Socioeconomic Disparities in Environmental Justice Research</p>
            </title>
            <aug>
               <au>
                  <snm>Mohai</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Saha</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>Demography</source>
            <pubdate>2006</pubdate>
            <volume>43</volume>
            <fpage>2</fpage>
            <xrefbib>
               <pubid idtype="doi">10.1353/dem.2006.0017</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B11">
            <title>
               <p>A spatial evaluation of socio demographics surrounding National Priorities List sites in Florida using a distance-based approach</p>
            </title>
            <aug>
               <au>
                  <snm>Kearney</snm>
                  <fnm>G</fnm>
               </au>
               <au>
                  <snm>Kiros</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>International Journal of Health Geographics</source>
            <pubdate>2009</pubdate>
            <volume>8</volume>
            <fpage>33</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1476-072X-8-33</pubid>
                  <pubid idtype="pmcid">2708146</pubid>
                  <pubid idtype="pmpid" link="fulltext">19531266</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B12">
            <title>
               <p>When census geography doesn't work: Using ancillary information to improve the spatial interpolation of demographic data</p>
            </title>
            <aug>
               <au>
                  <snm>Voss</snm>
                  <fnm>P</fnm>
               </au>
               <au>
                  <snm>Long</snm>
                  <fnm>D</fnm>
               </au>
               <au>
                  <snm>Hammer</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <publisher>Center for Demography and Ecology, University of Wisconsin, Madison</publisher>
            <pubdate>1999</pubdate>
         </bibl>
         <bibl id="B13">
            <title>
               <p>Measurement of spatial equity</p>
            </title>
            <aug>
               <au>
                  <snm>Truelove</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>Environment and Planning C: Government and Policy</source>
            <pubdate>1993</pubdate>
            <volume>11</volume>
            <fpage>1</fpage>
            <xrefbib>
               <pubid idtype="doi">10.1068/c110019</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B14">
            <title>
               <p>From here to there: Methods of allocating data between census geography and socially meaningful areas</p>
            </title>
            <aug>
               <au>
                  <snm>Saporito</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Chavers</snm>
                  <fnm>JM</fnm>
               </au>
               <au>
                  <snm>Nixon</snm>
                  <fnm>LC</fnm>
               </au>
               <au>
                  <snm>McQuiddy</snm>
                  <fnm>MR</fnm>
               </au>
            </aug>
            <source>Social Science Research</source>
            <pubdate>2007</pubdate>
            <volume>36</volume>
            <fpage>3</fpage>
            <xrefbib>
               <pubid idtype="doi">10.1016/j.ssresearch.2006.05.004</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B15">
            <title>
               <p>Missing stage and grade in Maryland prostate cancer surveillance data, 1992-1997</p>
            </title>
            <aug>
               <au>
                  <snm>Klassen</snm>
                  <fnm>AC</fnm>
               </au>
               <au>
                  <snm>Curriero</snm>
                  <fnm>F</fnm>
               </au>
               <au>
                  <snm>Kulldorff</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Alberg</snm>
                  <fnm>AJ</fnm>
               </au>
               <au>
                  <snm>Platz</snm>
                  <fnm>EA</fnm>
               </au>
               <au>
                  <snm>Neloms</snm>
                  <fnm>ST</fnm>
               </au>
            </aug>
            <source>Am J Prev Med</source>
            <pubdate>2006</pubdate>
            <volume>30</volume>
            <fpage>S77</fpage>
            <lpage>S87</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.amepre.2005.09.010</pubid>
                  <pubid idtype="pmpid" link="fulltext">16458794</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B16">
            <title>
               <p>The geographic distribution of breast cancer incidence in Massachusetts 1988 to adjusted for covariates</p>
            </title>
            <aug>
               <au>
                  <snm>Sheehan</snm>
                  <fnm>JT</fnm>
               </au>
               <au>
                  <snm>DeChello</snm>
                  <fnm>LM</fnm>
               </au>
               <au>
                  <snm>Kulldorff</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Gregorio</snm>
                  <fnm>DI</fnm>
               </au>
               <au>
                  <snm>Gershman</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Mroszczyk</snm>
                  <fnm>M</fnm>
               </au>
            </aug>
            <source>International Journal of Health Geographics</source>
            <pubdate>2004</pubdate>
            <volume>3</volume>
            <fpage>17</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1476-072X-3-17</pubid>
                  <pubid idtype="pmcid">514716</pubid>
                  <pubid idtype="pmpid" link="fulltext">15291960</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B17">
            <title>
               <p>Estimating the accuracy of geographical imputation</p>
            </title>
            <aug>
               <au>
                  <snm>Henry</snm>
                  <fnm>KA</fnm>
               </au>
               <au>
                  <snm>Boscoe</snm>
                  <fnm>FP</fnm>
               </au>
            </aug>
            <source>International Journal of Health Geographics</source>
            <pubdate>2008</pubdate>
            <volume>7</volume>
            <fpage>3</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1476-072X-7-3</pubid>
                  <pubid idtype="pmcid">2266732</pubid>
                  <pubid idtype="pmpid" link="fulltext">18215308</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B18">
            <title>
               <p>SEARCH for Diabetes in Youth: a multicenter study of the prevalence, incidence and classification of diabetes mellitus in youth</p>
            </title>
            <aug>
               <au>
                  <cnm>SEARCH Study Group</cnm>
               </au>
            </aug>
            <source>Control Clin Trials</source>
            <pubdate>2004</pubdate>
            <volume>25</volume>
            <fpage>458</fpage>
            <lpage>471</lpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1016/j.cct.2004.08.002</pubid>
                  <pubid idtype="pmpid" link="fulltext">15465616</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B19">
            <source>ArcGIS 9.3</source>
            <publisher>Redlands, CA: Environmental Systems Research Institute (ESRI)</publisher>
            <pubdate>2008</pubdate>
         </bibl>
         <bibl id="B20">
            <title>
               <p>Census 2000 ZIP Code Tabulation Areas Technical Documentation</p>
            </title>
            <aug>
               <au>
                  <cnm>US Census Bureau</cnm>
               </au>
            </aug>
            <url>http://www.census.gov/geo/ZCTA/zcta_tech_doc.pdf</url>
         </bibl>
         <bibl id="B21">
            <title>
               <p>On the use of ZIP codes and ZIP code tabulation areas (ZCTAs) for the spatial analysis of epidemiological data</p>
            </title>
            <aug>
               <au>
                  <snm>Grubesic</snm>
                  <fnm>TH</fnm>
               </au>
               <au>
                  <snm>Matisziw</snm>
                  <fnm>TC</fnm>
               </au>
            </aug>
            <source>Int J Health Geogr</source>
            <pubdate>2006</pubdate>
            <volume>5</volume>
            <fpage>58</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1476-072X-5-58</pubid>
                  <pubid idtype="pmcid">1762013</pubid>
                  <pubid idtype="pmpid" link="fulltext">17166283</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B22">
            <title>
               <p>Census 2000 Summary File 1, Census of Population and Housing</p>
            </title>
            <aug>
               <au>
                  <cnm>US Census Bureau</cnm>
               </au>
            </aug>
            <publisher>Washington, DC: US Bureau of the Census</publisher>
            <pubdate>2001</pubdate>
         </bibl>
         <bibl id="B23">
            <title>
               <p>The distribution of pollution: Community characteristics and exposure to air toxics</p>
            </title>
            <aug>
               <au>
                  <snm>Brooks</snm>
                  <fnm>N</fnm>
               </au>
               <au>
                  <snm>Sethi</snm>
                  <fnm>R</fnm>
               </au>
            </aug>
            <source>Journal of Environmental Economics and Management</source>
            <pubdate>1997</pubdate>
            <volume>32</volume>
            <fpage>233</fpage>
            <lpage>250</lpage>
            <xrefbib>
               <pubid idtype="doi">10.1006/jeem.1996.0967</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B24">
            <title>
               <p>Using ZIP Codes as Geocodes in Cancer Research</p>
            </title>
            <aug>
               <au>
                  <snm>Beyer</snm>
                  <fnm>KMM</fnm>
               </au>
               <au>
                  <snm>Schultz</snm>
                  <fnm>AF</fnm>
               </au>
               <au>
                  <snm>Rushton</snm>
                  <fnm>G</fnm>
               </au>
            </aug>
            <source>Geocoding Health Data: The Use of Geographic Codes in Cancer Prevention and Control, Research and Practice</source>
            <publisher>Boca Raton, Florida: CRC Press</publisher>
            <editor>Rushton G, Armstrong MP, Gittler J, Greene BR, Pavlik CE, West MM, Zimmerman DL</editor>
            <pubdate>2007</pubdate>
         </bibl>
         <bibl id="B25">
            <title>
               <p>Positional error in automated geocoding of residential addresses</p>
            </title>
            <aug>
               <au>
                  <snm>Cayo</snm>
                  <fnm>MR</fnm>
               </au>
               <au>
                  <snm>Talbot</snm>
                  <fnm>TO</fnm>
               </au>
            </aug>
            <source>Int J Health Geogr</source>
            <pubdate>2003</pubdate>
            <volume>2</volume>
            <fpage>10</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1476-072X-2-10</pubid>
                  <pubid idtype="pmcid">324564</pubid>
                  <pubid idtype="pmpid" link="fulltext">14687425</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B26">
            <title>
               <p>Positional accuracy of two methods of geocoding</p>
            </title>
            <aug>
               <au>
                  <snm>Ward</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Nuckols</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Giglierano</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Bonner</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Wolter</snm>
                  <fnm>C</fnm>
               </au>
               <au>
                  <snm>Airola</snm>
                  <fnm>M</fnm>
               </au>
               <au>
                  <snm>Mix</snm>
                  <fnm>W</fnm>
               </au>
               <au>
                  <snm>Colt</snm>
                  <fnm>J</fnm>
               </au>
               <au>
                  <snm>Hartge</snm>
                  <fnm>P</fnm>
               </au>
            </aug>
            <source>Epidemiology</source>
            <pubdate>2005</pubdate>
            <volume>16</volume>
            <fpage>4</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1097/01.ede.0000147106.32027.3e</pubid>
                  <pubid idtype="pmpid" link="fulltext">15613939</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
         <bibl id="B27">
            <title>
               <p>Post Office Box addresses: A challenge for Geographic Information System-based studies</p>
            </title>
            <aug>
               <au>
                  <snm>Hurley</snm>
                  <fnm>S</fnm>
               </au>
               <au>
                  <snm>Saunders</snm>
                  <fnm>T</fnm>
               </au>
               <au>
                  <snm>Nivas</snm>
                  <fnm>R</fnm>
               </au>
               <au>
                  <snm>Hertz</snm>
                  <fnm>A</fnm>
               </au>
               <au>
                  <snm>Reynolds</snm>
                  <fnm>P</fnm>
               </au>
            </aug>
            <source>Epidemiology</source>
            <pubdate>2003</pubdate>
            <volume>14</volume>
            <fpage>4</fpage>
         </bibl>
         <bibl id="B28">
            <title>
               <p>Dasymetric Mapping and Areal Interpolation: Implementation and Evaluation</p>
            </title>
            <aug>
               <au>
                  <snm>Eicher</snm>
                  <fnm>CL</fnm>
               </au>
               <au>
                  <snm>Brewer</snm>
                  <fnm>CA</fnm>
               </au>
            </aug>
            <source>Cartography and Geographic Information Science</source>
            <pubdate>2001</pubdate>
         </bibl>
         <bibl id="B29">
            <title>
               <p>Dasymetric Estimation of Population Density and Areal Interpolation of Census Data</p>
            </title>
            <aug>
               <au>
                  <snm>Holt</snm>
                  <fnm>JB</fnm>
               </au>
               <au>
                  <snm>Lo</snm>
                  <fnm>CP</fnm>
               </au>
               <au>
                  <snm>Hodler</snm>
                  <fnm>TW</fnm>
               </au>
            </aug>
            <source>Cartography and Geographic Information Science</source>
            <pubdate>2004</pubdate>
            <volume>31</volume>
            <fpage>2</fpage>
            <xrefbib>
               <pubid idtype="doi">10.1559/1523040041649407</pubid>
            </xrefbib>
         </bibl>
         <bibl id="B30">
            <title>
               <p>An effective and efficient approach for manually improving geocoded data</p>
            </title>
            <aug>
               <au>
                  <snm>Goldberg</snm>
                  <fnm>DW</fnm>
               </au>
               <au>
                  <snm>Wilson</snm>
                  <fnm>JP</fnm>
               </au>
               <au>
                  <snm>Knoblock</snm>
                  <fnm>CA</fnm>
               </au>
               <au>
                  <snm>Ritz</snm>
                  <fnm>B</fnm>
               </au>
               <au>
                  <snm>Cockburn</snm>
                  <fnm>MG</fnm>
               </au>
            </aug>
            <source>Int J Health Geogr</source>
            <pubdate>2008</pubdate>
            <volume>7</volume>
            <fpage>60</fpage>
            <xrefbib>
               <pubidlist>
                  <pubid idtype="doi">10.1186/1476-072X-7-60</pubid>
                  <pubid idtype="pmcid">2612650</pubid>
                  <pubid idtype="pmpid" link="fulltext">19032791</pubid>
               </pubidlist>
            </xrefbib>
         </bibl>
      </refgrp>
   </bm>
</art>

