DBScan epsilon parameter cannot be logged with version 5.1
Hello all,
I'm trying to investigate how DBScan behaves as eplison and min_points are varied but I noticed warnings when the process attempts to log the min_points parameter of the DBScan operator.
"No such parameter in 'com.rapidminer.parameter.ParameterTypeValue$OperatorValueSelection@1bfa971b"
And sure enough, there are missing values in the log output.
I just upgraded to 5.1 and I'm fairly sure this worked with the last 5.0 version.
I'm working on a macro driven workaround which I will post when I get it working.
regards
Andrew
I'm trying to investigate how DBScan behaves as eplison and min_points are varied but I noticed warnings when the process attempts to log the min_points parameter of the DBScan operator.
"No such parameter in 'com.rapidminer.parameter.ParameterTypeValue$OperatorValueSelection@1bfa971b"
And sure enough, there are missing values in the log output.
I just upgraded to 5.1 and I'm fairly sure this worked with the last 5.0 version.
I'm working on a macro driven workaround which I will post when I get it working.
regards
Andrew
Tagged:
0
Answers
-
Here it is
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.000">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.1.000" expanded="true" name="Process">
<process expanded="true" height="476" width="815">
<operator activated="true" class="generate_data" compatibility="5.1.000" expanded="true" height="60" name="Generate Data" width="90" x="45" y="75">
<parameter key="target_function" value="three ring clusters"/>
<parameter key="number_of_attributes" value="2"/>
</operator>
<operator activated="true" class="normalize" compatibility="5.1.000" expanded="true" height="94" name="Normalize" width="90" x="246" y="75">
<parameter key="method" value="range transformation"/>
</operator>
<operator activated="true" class="set_macros" compatibility="5.1.000" expanded="true" height="76" name="Set Macros" width="90" x="447" y="75">
<list key="macros">
<parameter key="epsilonStart" value="0.005"/>
<parameter key="epsilonIncrement" value="0.002"/>
<parameter key="loopCounter" value="0"/>
<parameter key="maxLoop" value="10"/>
</list>
</operator>
<operator activated="true" class="loop" compatibility="5.1.000" expanded="true" height="76" name="Loop" width="90" x="648" y="75">
<parameter key="iterations" value="%{maxLoop}"/>
<process expanded="true" height="579" width="984">
<operator activated="true" class="generate_macro" compatibility="5.1.000" expanded="true" height="76" name="Generate Macro" width="90" x="112" y="30">
<list key="function_descriptions">
<parameter key="epsilon" value="%{epsilonStart}+%{epsilonIncrement}*%{loopCounter}"/>
<parameter key="loopCounter" value="%{loopCounter}+1"/>
</list>
</operator>
<operator activated="true" class="provide_macro_as_log_value" compatibility="5.1.000" expanded="true" height="76" name="Provide Macro as Log Value (2)" width="90" x="246" y="30">
<parameter key="macro_name" value="epsilon"/>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="5.1.000" expanded="true" height="94" name="Optimize Parameters (Grid)" width="90" x="380" y="30">
<list key="parameters">
<parameter key="Clustering.min_points" value="[2;21;20;linear]"/>
</list>
<process expanded="true" height="579" width="984">
<operator activated="true" class="dbscan" compatibility="5.1.000" expanded="true" height="76" name="Clustering" width="90" x="179" y="165">
<parameter key="epsilon" value="%{epsilon}"/>
<parameter key="min_points" value="21"/>
</operator>
<operator activated="true" class="cluster_count_performance" compatibility="5.1.000" expanded="true" height="76" name="Count" width="90" x="313" y="165"/>
<operator activated="true" class="provide_macro_as_log_value" compatibility="5.1.000" expanded="true" height="76" name="Provide Macro as Log Value" width="90" x="514" y="165">
<parameter key="macro_name" value="loopCounter"/>
</operator>
<operator activated="true" class="log" compatibility="5.1.000" expanded="true" height="76" name="Log (3)" width="90" x="715" y="165">
<list key="log">
<parameter key="epsilon" value="operator.Provide Macro as Log Value (2).value.macro_value"/>
<parameter key="epsilonMissingFromDBSCAN" value="operator.Clustering.parameter."/>
<parameter key="minPoints" value="operator.Clustering.parameter.min_points"/>
<parameter key="loopCounter" value="operator.Provide Macro as Log Value.value.macro_value"/>
<parameter key="clusterNumber" value="operator.Count.value.clusternumber"/>
</list>
</operator>
<connect from_port="input 1" to_op="Clustering" to_port="example set"/>
<connect from_op="Clustering" from_port="cluster model" to_op="Count" to_port="cluster model"/>
<connect from_op="Count" from_port="performance" to_op="Provide Macro as Log Value" to_port="through 1"/>
<connect from_op="Provide Macro as Log Value" from_port="through 1" to_op="Log (3)" to_port="through 1"/>
<connect from_op="Log (3)" from_port="through 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="180"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
<connect from_port="input 1" to_op="Generate Macro" to_port="through 1"/>
<connect from_op="Generate Macro" from_port="through 1" to_op="Provide Macro as Log Value (2)" to_port="through 1"/>
<connect from_op="Provide Macro as Log Value (2)" from_port="through 1" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="performance" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<connect from_op="Generate Data" from_port="output" to_op="Normalize" to_port="example set input"/>
<connect from_op="Normalize" from_port="example set output" to_op="Set Macros" to_port="through 1"/>
<connect from_op="Set Macros" from_port="through 1" to_op="Loop" to_port="input 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
</process>
Andrew0 -
Hi,
I do not see the problem here. Logging works for me. Can you send me the example process?
Greetings,
Sebastian0 -
Hello Sebastian
If you run the process I already provided, it shows the problem.
However, I had a look at the XML and I noticed this...<parameter key="epsilonMissingFromDBSCAN" value="operator.Clustering.parameter."/>
I then edited the entry by explicitly choosing another value from the "edit parameter list" dialog box and then re-choosing the epsilon value and lo and behold...<parameter key="epsilonMissingFromDBSCAN" value="operator.Clustering.parameter.epsilon"/>
So it looks like if the parameter being selected is the first one in the dropdown list, a missing value is put in the XML although it looks like it has been selected.
regards,
Andrew0 -
Hi Andrew,
now I see the problem. until you have chosen one of the possibilities explicitly, you don' have an entry at all.
I'll try to fix it.
Greetings,
Sebastian0 -
Hi Andrew,
can you do me the favor and request it as a bugfix in our bugtracker? It's not that easy and I won't get it finished before my xmas holidays.
Greetings,
Sebastian0 -
Done...
merry christmas0