Hello
I'm trying to cluster data using the SVMCluster model from Samples/Processes/07 but only get 1 cluster from my data. I scaled the data to percentages and want to use a windowing process to examine some customer history then predict future buying.
I changed the model to LibSVMLearner and used the genetic parameter optimizer to try to find acceptable parameters. The output (prediction) is the same value for all examples. When I use your SVMclustering process with LibSVM it does generate different prediction values.
Can you explain what I am doing wrong? Do I need to use the original data instead of percentages?
Here are a modified RM sample that generates different prediction values (for simplicity I used the same data for training and testing just to get some results), my process that generates the same prediction value and some sample data. My process also takes a very long time to read the model after training but the model file is only 35K. Is there something wrong with model storage that could cause the problem?
(This post may apply
http://rapid-i.com/rapidforum/index.php?PHPSESSID=4f6c22a9df5bbb0be3e6e41d1eb2c119&topic=490.0)Thank you.
Data:
https://www.dropbox.com/s/fs7up70iyr0ugbz/rapidsvm.csvdescription and two process examples
https://www.dropbox.com/s/nrntxs397uun95m/rapidproblem.txt(My process exceeds the 20k limit for posts)
simple example adapted from 07_EvolutionaryParameterOptimization
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.005">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.005" expanded="true" name="Root">
<description><p>This process is also a parameter optimization process like the first one discussed in the meta group. In this case,
an evolutionary approach is used for the search of the best parameter combination. This approach is often more appropriate and leads to better
results without defining the parameter combinations which should be tested (as for the Grid Search and the quadratic parameter optimization
approaches).</p> <p>The parameters for the evolutionary parameter optimization approach are defined in the same way as for the
other parameter optimization operators. Instead of a comma separated list of parameters which should be checked the user has to define a colon
separated pair which is used as lower and upper bound for the specific parameters.</p> </description>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="5.3.005" expanded="true" height="60" name="Retrieve" width="90" x="45" y="30">
<parameter key="repository_entry" value="//Samples/data/Polynomial"/>
</operator>
<operator activated="true" class="multiply" compatibility="5.3.005" expanded="true" height="94" name="Multiply" width="90" x="179"
y="30"/>
<operator activated="true" class="optimize_parameters_evolutionary" compatibility="5.3.005" expanded="true" height="94"
name="ParameterOptimization" width="90" x="380" y="30">
<list key="parameters">
<parameter key="LibSVMLearner.C" value="[0.0;10000]"/>
</list>
<parameter key="max_generations" value="10"/>
<parameter key="tournament_fraction" value="0.75"/>
<parameter key="crossover_prob" value="1.0"/>
<process expanded="true">
<operator activated="true" class="loop_and_average" compatibility="5.3.005" expanded="true" height="76"
name="IteratingPerformanceAverage" width="90" x="45" y="30">
<parameter key="iterations" value="3"/>
<process expanded="true">
<operator activated="true" class="x_validation" compatibility="5.3.005" expanded="true" height="112" name="Validation" width="90"
x="112" y="30">
<parameter key="number_of_validations" value="2"/>
<parameter key="sampling_type" value="shuffled sampling"/>
<process expanded="true">
<operator activated="true" class="support_vector_machine_libsvm" compatibility="5.3.005" expanded="true" height="76"
name="LibSVMLearner" width="90" x="112" y="30">
<parameter key="svm_type" value="epsilon-SVR"/>
<parameter key="degree" value="40"/>
<parameter key="gamma" value="10.0"/>
<parameter key="C" value="2205.039608219743"/>
<parameter key="epsilon" value="1.0"/>
<list key="class_weights"/>
</operator>
<connect from_port="training" to_op="LibSVMLearner" to_port="training set"/>
<connect from_op="LibSVMLearner" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true">
<operator activated="true" class="apply_model" compatibility="5.3.005" expanded="true" height="76" name="Test" width="90"
x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="performance" compatibility="5.3.005" expanded="true" height="76" name="Performance"
width="90" x="227" y="30"/>
<connect from_port="model" to_op="Test" to_port="model"/>
<connect from_port="test set" to_op="Test" to_port="unlabelled data"/>
<connect from_op="Test" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="write_model" compatibility="5.3.005" expanded="true" height="60" name="Write Model" width="90"
x="380" y="120">
<parameter key="model_file" value="c:\rapid\test.mdl"/>
<parameter key="output_type" value="Binary"/>
</operator>
<connect from_port="in 1" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_op="Write Model" to_port="input"/>
<connect from_op="Validation" from_port="averagable 1" to_port="averagable 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="5.3.005" expanded="true" height="76" name="Log" width="90" x="425" y="30">
<parameter key="filename" value="paraopt.log"/>
<list key="log">
<parameter key="C" value="operator.LibSVMLearner.parameter.C"/>
<parameter key="degree" value="operator.LibSVMLearner.parameter.degree"/>
<parameter key="error" value="operator.IteratingPerformanceAverage.value.performance"/>
</list>
</operator>
<connect from_port="input 1" to_op="IteratingPerformanceAverage" to_port="in 1"/>
<connect from_op="IteratingPerformanceAverage" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
</process>
</operator>
<operator activated="true" class="read_model" compatibility="5.3.005" expanded="true" height="60" name="Read Model" width="90" x="179"
y="255">
<parameter key="model_file" value="c:\rapid\test.mdl"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.3.005" expanded="true" height="76" name="Apply Model" width="90" x="447"
y="210">
<list key="application_parameters"/>
</operator>
<connect from_op="Retrieve" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="ParameterOptimization" to_port="input 1"/>
<connect from_op="Multiply" from_port="output 2" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="ParameterOptimization" from_port="performance" to_port="result 1"/>
<connect from_op="ParameterOptimization" from_port="parameter" to_port="result 2"/>
<connect from_op="Read Model" from_port="output" to_op="Apply Model" to_port="model"/>
<connect from_op="Apply Model" from_port="labelled data" to_port="result 3"/>
<connect from_op="Apply Model" from_port="model" to_port="result 4"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
<portSpacing port="sink_result 4" spacing="0"/>
<portSpacing port="sink_result 5" spacing="0"/>
</process>
</operator>
</process>
[ /code]