🎉Community Raffle - Win $25

An exclusive raffle opportunity for active members like you! Complete your profile, answer questions and get your first accepted badge to enter the raffle.
Join and Win

"Substantial bug in scoring"

User: "stereotaxon"
New Altair Community Member
Updated by Jocelyn
Hello,

I've come across a pretty big bug. I build a model and then I read in a new dataset and try to score it.
The numeric attribute information stays the same, but the names I use as labels are written incorrectly. Is there
a fix or workaround for this problem?  I'm using WinXP and RapidMiner Community 4.2.0000.

Thanks, Mike



======= debug1.csv (TRAIN) =========

name,var1,var2,var3
Jimi,0.352612363,0.590121045,0.564992742
Janis,0.922569485,0.790112692,0.00504262
Bob,0.766240589,0.908079931,0.734902274
Peter,0.460154945,0.464329674,0.686559339
Paul,0.393046641,0.393054941,0.910596227
Mary,0.322384817,0.403900951,0.176867868
Joni,0.466668921,0.366803665,0.230654245

======= debug2.csv (SCORE) =========
* SAME NAME AS IN TRAINING DATASET
name,var1,var2,var3
Buddy,,0.576962167,0.318579208
Ringo,,0.571770059,0.977731173
Peter,,0.464329674,0.686559339 *
Paul,,0.393054941,0.910596227  *
Mary,,0.403900951,0.176867868  *
Joni,,0.366803665,0.230654245  *
John,,0.565080214,0.509177042
Jimi,,0.590121045,0.564992742  *
Jim,,0.864104026,0.839055131
Janis,,0.790112692,0.00504262  *
Bob,,0.908079931,0.734902274  *

======= SCORING OUTPUT =============
ROW NAME VAR1 PREDICTION(VAR1) VAR2 VAR3
1 Buddy NaN 0.506006992847086 0.577 0.319
2 Ringo NaN 0.5026999432485151 0.572 0.978
3 Paul NaN 0.4312676719193833 0.464 0.687
4 Mary NaN 0.38430756761967644 0.393 0.911
5 Joni NaN 0.39158307673653237 0.404 0.177
6 John NaN 0.3671109097071076 0.367 0.231<< *** ATTRIBUTES ARE OK
7 John NaN 0.4980700738105158 0.565 0.509<< *** BUT THE NAMES ARE WRONG!
8 Buddy NaN 0.5146053218033704 0.59 0.565
9 Jim NaN 0.8006220695941839 0.864 0.839
10 Ringo NaN 0.7430405001131846 0.79 0.0050
11 Peter NaN 0.8348597595558593 0.908 0.735


<operator name="Root" class="Process" expanded="yes">
    <operator name="Model" class="OperatorChain" expanded="yes">
        <operator name="CSVExampleSource" class="CSVExampleSource" breakpoints="after">
            <parameter key="filename" value="c:\debug1.csv"/>
            <parameter key="id_column" value="1"/>
            <parameter key="label_column" value="2"/>
        </operator>
        <operator name="ExcelExampleSource" class="ExcelExampleSource" breakpoints="after" activated="no">
            <parameter key="excel_file" value="c:\debug1.xls"/>
            <parameter key="first_row_as_names" value="true"/>
            <parameter key="id_column" value="1"/>
            <parameter key="label_column" value="2"/>
        </operator>
        <operator name="W-M5P" class="W-M5P">
            <parameter key="keep_example_set" value="true"/>
        </operator>
        <operator name="ModelApplier" class="ModelApplier">
            <list key="application_parameters">
            </list>
            <parameter key="keep_model" value="true"/>
        </operator>
        <operator name="ModelWriter" class="ModelWriter">
            <parameter key="model_file" value="c:\debug.mod"/>
            <parameter key="output_type" value="XML"/>
        </operator>
        <operator name="IOConsumer" class="IOConsumer">
            <parameter key="io_object" value="ExampleSet"/>
        </operator>
        <operator name="IOConsumer (2)" class="IOConsumer">
            <parameter key="io_object" value="Model"/>
        </operator>
    </operator>
    <operator name="Score" class="OperatorChain" expanded="yes">
        <operator name="CSVExampleSource (2)" class="CSVExampleSource" breakpoints="after" activated="no">
            <parameter key="filename" value="c:\debug2.csv"/>
            <parameter key="id_column" value="1"/>
            <parameter key="label_column" value="2"/>
        </operator>
        <operator name="ExcelExampleSource (2)" class="ExcelExampleSource">
            <parameter key="excel_file" value="c:\debug2.xls"/>
            <parameter key="first_row_as_names" value="true"/>
            <parameter key="id_column" value="1"/>
            <parameter key="label_column" value="2"/>
        </operator>
        <operator name="ModelLoader" class="ModelLoader">
            <parameter key="model_file" value="c:\debug.mod"/>
        </operator>
        <operator name="ModelApplier (2)" class="ModelApplier">
            <list key="application_parameters">
            </list>
        </operator>
        <operator name="CSVExampleSetWriter" class="CSVExampleSetWriter" breakpoints="after">
            <parameter key="column_separator" value=","/>
            <parameter key="csv_file" value="c:\debug3.csv"/>
        </operator>
    </operator>
</operator>

Find more posts tagged with

Sort by:
1 - 2 of 21
    User: "steffen"
    New Altair Community Member
    Hello

    uh, this is really not nice. Another "Nominal Mapping sealed its own doom"-error I suppose...

    here is a workaround:
    I simply create a new id, apply the model and then restore the old id. Since "ExampleSetJoin" is used, your set should not be that large...
    <operator name="Root" class="Process" expanded="yes">
        <operator name="Model" class="OperatorChain" expanded="yes">
            <operator name="CSVExampleSource" class="CSVExampleSource">
                <parameter key="filename" value="debug1.csv"/>
                <parameter key="id_column" value="1"/>
                <parameter key="label_column" value="2"/>
            </operator>
            <operator name="W-M5P" class="W-M5P">
                <parameter key="keep_example_set" value="true"/>
            </operator>
            <operator name="ModelApplier" class="ModelApplier">
                <list key="application_parameters">
                </list>
                <parameter key="keep_model" value="true"/>
            </operator>
            <operator name="ModelWriter" class="ModelWriter">
                <parameter key="model_file" value="debug.mod"/>
                <parameter key="output_type" value="XML"/>
            </operator>
            <operator name="IOConsumer" class="IOConsumer">
                <parameter key="io_object" value="ExampleSet"/>
            </operator>
            <operator name="IOConsumer (2)" class="IOConsumer">
                <parameter key="io_object" value="Model"/>
            </operator>
        </operator>
        <operator name="Score" class="OperatorChain" expanded="yes">
            <operator name="CSVExampleSource (2)" class="CSVExampleSource">
                <parameter key="filename" value="debug2.csv"/>
                <parameter key="id_column" value="1"/>
                <parameter key="label_column" value="2"/>
            </operator>
            <operator name="ModelLoader" class="ModelLoader">
                <parameter key="model_file" value="debug.mod"/>
            </operator>
            <operator name="ChangeAttributeRole" class="ChangeAttributeRole">
                <parameter key="name" value="name"/>
                <parameter key="target_role" value="ignore"/>
            </operator>
            <operator name="IdTagging" class="IdTagging">
            </operator>
            <operator name="IOMultiplier" class="IOMultiplier">
                <parameter key="io_object" value="ExampleSet"/>
            </operator>
            <operator name="reduce_id_saver_set" class="OperatorChain" expanded="yes">
                <operator name="remove_all_except_id_name" class="FeatureNameFilter">
                    <parameter key="except_features_with_name" value="id||name"/>
                    <parameter key="filter_special_features" value="true"/>
                    <parameter key="skip_features_with_name" value=".*"/>
                </operator>
            </operator>
            <operator name="IOSelector" class="IOSelector">
                <parameter key="io_object" value="ExampleSet"/>
                <parameter key="select_which" value="2"/>
            </operator>
            <operator name="remove_name" class="FeatureNameFilter">
                <parameter key="filter_special_features" value="true"/>
                <parameter key="skip_features_with_name" value="name"/>
            </operator>
            <operator name="ModelApplier (2)" class="ModelApplier">
                <list key="application_parameters">
                </list>
                <parameter key="keep_model" value="true"/>
            </operator>
            <operator name="ExampleSetJoin" class="ExampleSetJoin">
            </operator>
            <operator name="restore_old_id" class="ChangeAttributeRole">
                <parameter key="name" value="name"/>
                <parameter key="target_role" value="id"/>
            </operator>
            <operator name="CSVExampleSetWriter" class="CSVExampleSetWriter">
                <parameter key="column_separator" value=","/>
                <parameter key="csv_file" value="debug4.csv"/>
            </operator>
        </operator>
    </operator>

    hope this was helpful

    Steffen

    PS: Setting the old id to "inactive" does not work (as expected)
    PPS: Note that there is an attachement-function. The function is hidden under "additional options" in the postreply-dialog. So you are able to add xmls and saved .csvs to your post (of moderate size)
    User: "stereotaxon"
    New Altair Community Member
    OP
    Thanks for the workaround. 
    -Mike