Hello,
I have a dataset that contains longitude and latitude coordinates along with some other attributes. I also have a second database that contains Zip Codes with their centroids (in longitude and latitude). For each example in the first dataset, I need to identify the Zip Code with the nearest centroid (second dataset). In other words, I need to use the good ol' distance formula [SQRT( (Lon2 - Lon1)^2 + (Lat2 - Lat1)^2 )] to calculate distances between each example in the first dataset to each example in the second dataset. I need to return the Zip Code associated with the minimum distance for each example in the first dataset (adding it as a new attribute). Below is what I have so far (not much). I've assigned a macro for each Longitude and each Latitude value in the Zip Code dataset. I was thinking that I could use a Loop Examples operator with an internal Generate Attribute operator to calculate distances for each example in the first dataset. Unfortunately, I can't find a way of making calculations based on every set of coordinates from the second dataset. Any help on this would be greatly appreciated.
Thanks,
Damian
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.003" expanded="true" name="Process">
<process expanded="true" height="569" width="925">
<operator activated="true" class="generate_data" compatibility="5.1.003" expanded="true" height="60" name="Data Set" width="90" x="45" y="30">
<parameter key="number_of_attributes" value="2"/>
<parameter key="attributes_lower_bound" value="-176.7"/>
<parameter key="attributes_upper_bound" value="70.7"/>
</operator>
<operator activated="true" class="rename" compatibility="5.1.003" expanded="true" height="76" name="Rename" width="90" x="179" y="30">
<parameter key="old_name" value="label"/>
<parameter key="new_name" value="DataValue"/>
<list key="rename_additional_attributes">
<parameter key="att1" value="Latitude"/>
<parameter key="att2" value="Longitude"/>
</list>
</operator>
<operator activated="true" class="set_role" compatibility="5.1.003" expanded="true" height="76" name="Set Role" width="90" x="179" y="75">
<parameter key="name" value="DataValue"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data" compatibility="5.1.003" expanded="true" height="60" name="Zip Code DB" width="90" x="45" y="210">
<parameter key="number_examples" value="180"/>
<parameter key="number_of_attributes" value="2"/>
<parameter key="attributes_lower_bound" value="-176.7"/>
<parameter key="attributes_upper_bound" value="70.7"/>
</operator>
<operator activated="true" class="generate_attributes" compatibility="5.1.003" expanded="true" height="76" name="Generate Attributes" width="90" x="179" y="210">
<list key="function_descriptions">
<parameter key="ZIP" value="round(rand()*(70000-1)+1)"/>
</list>
</operator>
<operator activated="true" class="rename" compatibility="5.1.003" expanded="true" height="76" name="Rename (2)" width="90" x="179" y="255">
<parameter key="old_name" value="att1"/>
<parameter key="new_name" value="Latitude"/>
<list key="rename_additional_attributes">
<parameter key="att2" value="Longitude"/>
</list>
</operator>
<operator activated="true" class="select_attributes" compatibility="5.1.003" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="300">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="label"/>
<parameter key="invert_selection" value="true"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<operator activated="true" class="loop_examples" compatibility="5.1.003" expanded="true" height="76" name="Loop Examples" width="90" x="313" y="300">
<process expanded="true" height="551" width="870">
<operator activated="true" class="extract_macro" compatibility="5.1.003" expanded="true" height="60" name="Extract Macro" width="90" x="45" y="30">
<parameter key="macro" value="Lat%{example}"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="Latitude"/>
<parameter key="example_index" value="%{example}"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="5.1.003" expanded="true" height="60" name="Extract Macro (2)" width="90" x="179" y="30">
<parameter key="macro" value="Lon%{example}"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="Longitude"/>
<parameter key="example_index" value="%{example}"/>
</operator>
<connect from_port="example set" to_op="Extract Macro" to_port="example set"/>
<connect from_op="Extract Macro" from_port="example set" to_op="Extract Macro (2)" to_port="example set"/>
<connect from_op="Extract Macro (2)" from_port="example set" to_port="example set"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<connect from_op="Data Set" from_port="output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Zip Code DB" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
<connect from_op="Rename (2)" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>