[Solved] SVM Support Vector Machine - Performance, Weights and Paramters
Dear all,
after struggling with neural nets (see post http://rapid-i.com/rapidforum/index.php/topic,5356.0.html) I decided to follow the advice to try SVMs.
Therefore, I created a sample process with test data.
My observations:
1) The parameter set output show a prediction trend accuracy of 0.487 while the log shows 0.579 as best performance. This is surprising to me as I thought that the optimizer would provide the best combination.
2) It is written that SVM operator can handle weights. It might be a stupid question but where can I connect the weights that are coming from e.g. the correlation matrix?
By the way:
- Is there any kind of further documentation which helps to understand the SVM's parameters?
- Neural net offers internal normalization. Is that also true for SVMs or do I need a separate operator for this?
Thank you very much for sharing your ideas...
Sachs
Sample set: http://datahost.bplaced.net/sample3.xls
after struggling with neural nets (see post http://rapid-i.com/rapidforum/index.php/topic,5356.0.html) I decided to follow the advice to try SVMs.
Therefore, I created a sample process with test data.
My observations:
1) The parameter set output show a prediction trend accuracy of 0.487 while the log shows 0.579 as best performance. This is surprising to me as I thought that the optimizer would provide the best combination.
2) It is written that SVM operator can handle weights. It might be a stupid question but where can I connect the weights that are coming from e.g. the correlation matrix?
By the way:
- Is there any kind of further documentation which helps to understand the SVM's parameters?
- Neural net offers internal normalization. Is that also true for SVMs or do I need a separate operator for this?
Thank you very much for sharing your ideas...
Sachs
Sample set: http://datahost.bplaced.net/sample3.xls
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.003">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.2.003" expanded="true" name="Process">
<process expanded="true" height="469" width="701">
<operator activated="true" class="read_excel" compatibility="5.2.003" expanded="true" height="60" name="Read Excel" width="90" x="45" y="120">
<parameter key="excel_file" value="C:\sample3.xls"/>
<parameter key="imported_cell_range" value="A1:AM74"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="date_format" value="dd.MM.yyyy"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="id.true.integer.id"/>
<parameter key="1" value="a.true.real.attribute"/>
<parameter key="2" value="b.true.real.attribute"/>
<parameter key="3" value="c.true.real.attribute"/>
<parameter key="4" value="d.true.real.attribute"/>
<parameter key="5" value="e.true.real.attribute"/>
<parameter key="6" value="f.true.real.attribute"/>
<parameter key="7" value="g.true.real.attribute"/>
<parameter key="8" value="h.true.real.attribute"/>
<parameter key="9" value="i.true.real.attribute"/>
<parameter key="10" value="label.true.real.attribute"/>
<parameter key="11" value="j.true.real.attribute"/>
<parameter key="12" value="k.true.real.attribute"/>
<parameter key="13" value="l.true.real.attribute"/>
<parameter key="14" value="m.true.real.attribute"/>
<parameter key="15" value="n.true.real.attribute"/>
<parameter key="16" value="o.true.real.attribute"/>
<parameter key="17" value="p.true.real.attribute"/>
<parameter key="18" value="q.true.real.attribute"/>
<parameter key="19" value="r.true.real.attribute"/>
<parameter key="20" value="s.true.real.attribute"/>
<parameter key="21" value="t.true.real.attribute"/>
<parameter key="22" value="u.true.real.attribute"/>
<parameter key="23" value="v.true.real.attribute"/>
<parameter key="24" value="w.true.real.attribute"/>
<parameter key="25" value="x.true.real.attribute"/>
<parameter key="26" value="y.true.real.attribute"/>
<parameter key="27" value="z.true.real.attribute"/>
<parameter key="28" value="aa.true.real.attribute"/>
<parameter key="29" value="ab.true.real.attribute"/>
<parameter key="30" value="ac.true.real.attribute"/>
<parameter key="31" value="ad.true.real.attribute"/>
<parameter key="32" value="ae.true.real.attribute"/>
<parameter key="33" value="af.true.real.attribute"/>
<parameter key="34" value="ag.true.real.attribute"/>
<parameter key="35" value="ah.true.real.attribute"/>
<parameter key="36" value="ai.true.real.attribute"/>
<parameter key="37" value="aj.true.real.attribute"/>
<parameter key="38" value="ak.true.real.attribute"/>
</list>
</operator>
<operator activated="true" class="multiply" compatibility="5.2.003" expanded="true" height="112" name="Multiply" width="90" x="179" y="120"/>
<operator activated="true" class="correlation_matrix" compatibility="5.2.003" expanded="true" height="94" name="Correlation Matrix" width="90" x="313" y="345"/>
<operator activated="true" class="series:windowing" compatibility="5.1.002" expanded="true" height="76" name="Windowing" width="90" x="313" y="30">
<parameter key="horizon" value="1"/>
<parameter key="window_size" value="1"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="label"/>
</operator>
<operator activated="true" class="optimize_parameters_grid" compatibility="5.2.003" expanded="true" height="112" name="Optimize Parameters (Grid)" width="90" x="447" y="30">
<list key="parameters">
<parameter key="SVM (Linear).C" value="[0.0001;250;5;logarithmic]"/>
<parameter key="SVM (Linear).convergence_epsilon" value="[0.001;0.1;5;logarithmic]"/>
</list>
<process expanded="true" height="446" width="622">
<operator activated="true" class="series:sliding_window_validation" compatibility="5.1.002" expanded="true" height="112" name="Validation" width="90" x="179" y="30">
<parameter key="training_window_width" value="20"/>
<parameter key="training_window_step_size" value="10"/>
<parameter key="test_window_width" value="20"/>
<process expanded="true" height="447" width="232">
<operator activated="true" class="support_vector_machine_linear" compatibility="5.2.003" expanded="true" height="76" name="SVM (Linear)" width="90" x="112" y="30">
<parameter key="C" value="250.0"/>
<parameter key="convergence_epsilon" value="0.1"/>
</operator>
<connect from_port="training" to_op="SVM (Linear)" to_port="training set"/>
<connect from_op="SVM (Linear)" from_port="model" to_port="model"/>
<portSpacing port="source_training" spacing="0"/>
<portSpacing port="sink_model" spacing="0"/>
<portSpacing port="sink_through 1" spacing="0"/>
</process>
<process expanded="true" height="447" width="299">
<operator activated="true" class="apply_model" compatibility="5.2.003" expanded="true" height="76" name="Apply Model" width="90" x="45" y="30">
<list key="application_parameters"/>
</operator>
<operator activated="true" class="series:forecasting_performance" compatibility="5.1.002" expanded="true" height="76" name="Performance" width="90" x="179" y="30">
<parameter key="horizon" value="1"/>
</operator>
<connect from_port="model" to_op="Apply Model" to_port="model"/>
<connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
<connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
<connect from_op="Performance" from_port="performance" to_port="averagable 1"/>
<portSpacing port="source_model" spacing="0"/>
<portSpacing port="source_test set" spacing="0"/>
<portSpacing port="source_through 1" spacing="0"/>
<portSpacing port="sink_averagable 1" spacing="0"/>
<portSpacing port="sink_averagable 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log" compatibility="5.2.003" expanded="true" height="76" name="Log" width="90" x="380" y="120">
<list key="log">
<parameter key="c" value="operator.SVM (Linear).parameter.C"/>
<parameter key="epsilon" value="operator.SVM (Linear).parameter.convergence_epsilon"/>
<parameter key="prediction_trend_accuracy" value="operator.Performance.value.prediction_trend_accuracy"/>
</list>
</operator>
<connect from_port="input 1" to_op="Validation" to_port="training"/>
<connect from_op="Validation" from_port="model" to_port="result 1"/>
<connect from_op="Validation" from_port="averagable 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="performance"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_performance" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="series:windowing" compatibility="5.1.002" expanded="true" height="76" name="Windowing (2)" width="90" x="313" y="210">
<parameter key="window_size" value="1"/>
<parameter key="create_label" value="true"/>
<parameter key="label_attribute" value="label"/>
</operator>
<operator activated="true" class="apply_model" compatibility="5.2.003" expanded="true" height="76" name="Apply Model (2)" width="90" x="581" y="210">
<list key="application_parameters"/>
</operator>
<connect from_op="Read Excel" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Windowing" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Windowing (2)" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 3" to_op="Correlation Matrix" to_port="example set"/>
<connect from_op="Windowing" from_port="example set output" to_op="Optimize Parameters (Grid)" to_port="input 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="parameter" to_port="result 1"/>
<connect from_op="Optimize Parameters (Grid)" from_port="result 1" to_op="Apply Model (2)" to_port="model"/>
<connect from_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
<connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="18"/>
<portSpacing port="sink_result 2" spacing="144"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>