Remove Duplicates
Hi,
I have a problem using the Remove Duplicates Operator. Here is my workflow:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<process expanded="true" height="646" width="1095">
<operator activated="true" class="read_excel" expanded="true" height="60" name="Read Excel" width="90" x="45" y="30">
<parameter key="excel_file" value="E:\RM\Test_Duplicates.xls"/>
<list key="annotations"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="179" y="30">
<parameter key="name" value="CID"/>
<parameter key="target_role" value="id"/>
</operator>
<operator activated="true" class="remove_duplicates" expanded="true" height="76" name="Remove Duplicates" width="90" x="313" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="CID"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
And here is the table I am using to test it.
CID Value
3596 X
4054 X
4054 X
3000 S
32135 S
When I use it with invert selction to get the duplicates only it gives me 2 examples which are not duplicate. Could someone tell me where I go wrong?
Cheers,
Markus
I have a problem using the Remove Duplicates Operator. Here is my workflow:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.0">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" expanded="true" name="Process">
<process expanded="true" height="646" width="1095">
<operator activated="true" class="read_excel" expanded="true" height="60" name="Read Excel" width="90" x="45" y="30">
<parameter key="excel_file" value="E:\RM\Test_Duplicates.xls"/>
<list key="annotations"/>
</operator>
<operator activated="true" class="set_role" expanded="true" height="76" name="Set Role" width="90" x="179" y="30">
<parameter key="name" value="CID"/>
<parameter key="target_role" value="id"/>
</operator>
<operator activated="true" class="remove_duplicates" expanded="true" height="76" name="Remove Duplicates" width="90" x="313" y="30">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="CID"/>
<parameter key="include_special_attributes" value="true"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Remove Duplicates" to_port="example set input"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
And here is the table I am using to test it.
CID Value
3596 X
4054 X
4054 X
3000 S
32135 S
When I use it with invert selction to get the duplicates only it gives me 2 examples which are not duplicate. Could someone tell me where I go wrong?
Cheers,
Markus