Execute R Error

Warren_Yabsley
Warren_Yabsley New Altair Community Member
edited November 5 in Altair RapidMiner
Hello Everyone

I am trying to run an R script in RM but have been encountering error messages. I am new to RM and have very limited programming knowledge (script written externally) but have tested the script in R and it is running without errors.

I want to determine a predefined score per job based on four variables that need to be extracted from a very large dataset. As there are many instances per job, the maximum score then needs to be selected. Data are contained in six Excel spreadsheets.

Running the complete process generates the error message: “Execution of the R script failed”. Running part of the script that relates to the first Excel spreadsheet does not produce any errors nor when the script is expanded to incorporate the second spreadsheet. However, when the third spreadsheet is included, the above error message is displayed.

I added a breakpoint before Execute R and the six data spreadsheets have loaded correctly. Their file names match those in the rm_main and within the script.

Below is the xml code.

Any help with this would be much appreciated.

Warren
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Analytics_Four_Months_SAPBW_Extract_May14" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Local Repository/data/R 4 Months/Analytics_Four_Months_SAPBW_Extract_May14"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Distance_Map" width="90" x="45" y="136">
        <parameter key="repository_entry" value="//Local Repository/data/R 4 Months/Distance_Map"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Four_Months_Orders_In_One_Table_May14" width="90" x="45" y="340">
        <parameter key="repository_entry" value="//Local Repository/data/R 4 Months/Four_Months_Orders_In_One_Table_May14"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Reading_Map" width="90" x="45" y="187">
        <parameter key="repository_entry" value="//Local Repository/data/R 4 Months/Reading_Map"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Risk_Score_Map" width="90" x="45" y="238">
        <parameter key="repository_entry" value="//Local Repository/data/R 4 Months/Risk_Score_Map"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Risk_Score_Calculation_2D_ Matrix" width="90" x="179" y="387">
        <parameter key="repository_entry" value="//Local Repository/data/R 4 Months/Risk_Score_Calculation_2D_ Matrix"/>
      </operator>
      <operator activated="true" class="r_scripting:execute_r" compatibility="7.0.000" expanded="true" height="187" name="Execute R" width="90" x="313" y="34">
        <parameter key="script" value="# rm_main is a mandatory function, &#10;# the number of arguments has to be the number of input ports (can be none)&#10;&#10;rm_main = function(Analytics_Four_Months_SAPBW_Extract_May14.XLSX,Four_Months_Orders_In_One_Table_May14.xlsx,Risk_Score_Calculation_2D_Matrix.xlsx,Reading_Map.xlsx,Risk_Score_Map.xlsx,Distance_Map.xlsx)&#10;&#10;{&#10;&#10;rm(list=ls())&#10;setwd('C:/Enzen/WWU/R Code')&#10;&#10;&#10;library(readxl)&#10;&#10;## process 1&#10;&#10;&#10;a=read_excel('Analytics_Four_Months_SAPBW_Extract_May14.XLSX',col_types = c(rep('text',11),'date',rep('text',3)),sheet = 1)&#10;&#10;a=a[,!names(a)%in%'R']&#10;names(a)=c(&quot;OBJ_ZP2_AUFNR&quot;, &quot;OBJ_ZP2_CLAS&quot;, &quot;OBJ_ZP2_ATNAM&quot;,&quot;OBJ_ZP2_ATZHL&quot;,&quot;OBJ_ZP2_FENUM&quot;,&quot;OBJ_ZP2_POSNR&quot;,&quot;OBJ_ZP2_QMNUM&quot;,&quot;OBJ_ZP2_ATWRT&quot;,&quot;OBJ_ZP2_ATWTB&quot;,&quot;OBJ_ZP2_CFAW&quot;,&quot;OBJ_0ERDAT&quot;,&quot;OBJ_0AEDAT&quot;,&quot;OBJ_ZP2_ERNAM&quot;,&quot;OBJ_ZP2_CHGBY&quot;)&#10;&#10;a[,c(1,4:7,12,14)]=apply(a[,c(1,4:7,12,14)],2,as.numeric)&#10;ZP2_OWCF_temp=a&#10;&#10;&#10;&#10;## process 2&#10;ZP2_OWCF=with(&#10;  ZP2_OWCF_temp,&#10;  OBJ_ZP2_CFAW == 'A' &amp; (&#10;    OBJ_ZP2_CLAS %in% c('EM_LIS_BARHOLE', 'EM_GAS_BARHOLE') |(&#10;      OBJ_ZP2_CLAS == 'EM_LIS_SITE_DTS' &amp; (OBJ_ZP2_ATNAM %in%c('EM_LISHAZARDCAT', 'EM_LISPRESSURE') &amp; OBJ_ZP2_ATWRT != 'ZZZ')&#10;      )&#10;    )&#10;  )&#10;ColNames=c('OBJ_0AEDAT','OBJ_0ERDAT','OBJ_ZP2_ATNAM', 'OBJ_ZP2_ATWRT', 'OBJ_ZP2_ATWTB', 'OBJ_ZP2_ATZHL', 'OBJ_ZP2_AUFNR', 'OBJ_ZP2_CFAW','OBJ_ZP2_CHGBY', 'OBJ_ZP2_CLAS', 'OBJ_ZP2_ERNAM', 'OBJ_ZP2_FENUM', 'OBJ_ZP2_POSNR', 'OBJ_ZP2_QMNUM')&#10;ZP2_OWCF=ZP2_OWCF_temp[ZP2_OWCF,names(ZP2_OWCF_temp)%in%ColNames]&#10;&#10;&#10;&#10;## process 3&#10;PRESSURE = with(&#10;  ZP2_OWCF_temp,&#10;  OBJ_ZP2_CFAW == 'A' &amp; (&#10;    OBJ_ZP2_CLAS == 'EM_LIS_SITE_DTS' &amp; &#10;      (OBJ_ZP2_ATNAM == 'EM_LISPRESSURE' &amp; OBJ_ZP2_ATWRT != 'ZZZ')&#10;    )&#10;  )&#10;ColNames=c('OBJ_0AEDAT', 'OBJ_0ERDAT', 'OBJ_ZP2_ATNAM', 'OBJ_ZP2_ATWRT', 'OBJ_ZP2_ATWTB', 'OBJ_ZP2_ATZHL',  'OBJ_ZP2_AUFNR','OBJ_ZP2_CFAW', 'OBJ_ZP2_CHGBY', 'OBJ_ZP2_CLAS', 'OBJ_ZP2_ERNAM', 'OBJ_ZP2_FENUM', 'OBJ_ZP2_POSNR', 'OBJ_ZP2_QMNUM')&#10;&#10;PRESSURE=ZP2_OWCF_temp[PRESSURE,names(ZP2_OWCF_temp)%in%ColNames]&#10;&#10;## process 4&#10;HAZARDCAT = with(&#10;  ZP2_OWCF_temp,&#10;  OBJ_ZP2_CFAW == 'A' &amp; (&#10;    OBJ_ZP2_CLAS == 'EM_LIS_SITE_DTS' &amp; &#10;      (OBJ_ZP2_CLAS == 'EM_LIS_SITE_DTS' &amp; OBJ_ZP2_ATNAM == 'EM_LISHAZARDCAT')&#10;  )&#10;)&#10;ColNames=c('OBJ_0AEDAT', 'OBJ_0ERDAT', 'OBJ_ZP2_ATNAM', 'OBJ_ZP2_ATWRT', 'OBJ_ZP2_ATWTB', 'OBJ_ZP2_ATZHL','OBJ_ZP2_AUFNR', 'OBJ_ZP2_CFAW', 'OBJ_ZP2_CHGBY', 'OBJ_ZP2_CLAS', 'OBJ_ZP2_ERNAM', 'OBJ_ZP2_FENUM', 'OBJ_ZP2_POSNR', 'OBJ_ZP2_QMNUM')&#10;HAZARDCAT=ZP2_OWCF_temp[HAZARDCAT,names(ZP2_OWCF_temp)%in%ColNames]&#10;&#10;## process 5&#10;masterfields=unique(ZP2_OWCF$OBJ_ZP2_ATNAM)&#10;vfieldnos=length(masterfields)&#10;&#10;fields=unique(ZP2_OWCF[,c('OBJ_ZP2_AUFNR', 'OBJ_ZP2_FENUM')])&#10;&#10;for(i in 1:vfieldnos){&#10;  vfield = masterfields&#10;  tmp=ZP2_OWCF[ZP2_OWCF$OBJ_ZP2_ATNAM%in%vfield,c('OBJ_ZP2_AUFNR','OBJ_ZP2_FENUM','OBJ_ZP2_ATWTB')]&#10;  fields=merge(fields,tmp,by = c('OBJ_ZP2_AUFNR', 'OBJ_ZP2_FENUM'),all.x=T)&#10;  names(fields)[i+2]=vfield&#10;}&#10;&#10;&#10;&#10;## process 6&#10;# ,'OBJ_ZP2_FENUM'&#10;PRESSURE_VAL=PRESSURE[,c('OBJ_ZP2_AUFNR','OBJ_ZP2_ATWTB')]&#10;names(PRESSURE_VAL)[2]='PRESSURE_VAL'&#10;&#10;HAZARDCAT_VAL=HAZARDCAT[,c('OBJ_ZP2_AUFNR','OBJ_ZP2_ATWTB')]&#10;names(HAZARDCAT_VAL)[2]='HAZARDCAT_VAL'&#10;&#10;fields=merge(fields,PRESSURE_VAL,by=c('OBJ_ZP2_AUFNR'),all.x = T,all.y = F)&#10;&#10;fields=merge(fields,HAZARDCAT_VAL,by=c('OBJ_ZP2_AUFNR'),all.x = T)&#10;&#10;## process 7&#10;b=read_excel('Four_Months_Orders_In_One_Table_May14.xlsx',col_types = c('numeric','text','date','numeric',rep('text',2)),sheet = 3)&#10;Hour12Table=b[,c('OBJ_JOBS_SUPORDER', &#10;                'OBJ_JOBS_SUPUSERSTATUS', &#10;                'OBJ_JOBS_CREATEDON', &#10;                'GAS_STOP_DURATION'&#10;)]&#10;&#10;names(Hour12Table)[1]='OBJ_ZP2_AUFNR'&#10;&#10;&#10;bb=unlist(sapply(Hour12Table$OBJ_JOBS_SUPUSERSTATUS ,function(i){&#10;  length(c(grep('LIM',i),grep('LIP',i)))&gt;0&#10;}))&#10;&#10;Hour12Table=Hour12Table[bb,]&#10;&#10;Hour12Table=merge(fields,Hour12Table,by = 'OBJ_ZP2_AUFNR',all.y = T)&#10;&#10;&#10;idx12=Hour12Table$OBJ_ZP2_AUFNR%in%unique(fields$OBJ_ZP2_AUFNR)&#10;Hour12Table=Hour12Table[idx12,]&#10;&#10;&#10;## process 8&#10;c=read_excel('Risk_Score_Calculation_2D_Matrix.xlsx',col_types = c('numeric',rep('text',4),rep('numeric',3)),sheet = 1)&#10;RiskScore=c[1:360,]&#10;&#10;&#10;RiskScore1=RiskScore[,names(RiskScore)%in%c('Hazard Category','Pressure','Distance From Property','Lower Threshold Vented Gas Reading','Upper Threshold Vented Gas Reading','Risk Score Value')]&#10;IterNo= RiskScore[,'Upper Threshold Vented Gas Reading'] - RiskScore[,'Lower Threshold Vented Gas Reading'] + 1 &#10;RiskScore1$GasReading=RiskScore1[,'Lower Threshold Vented Gas Reading']+IterNo-1&#10;&#10;RiskScore1=do.call(rbind,by(RiskScore1,INDICES = RiskScore1$'Risk Score Value',function(i){&#10;  tmp=i&#10;  out0=lapply(1:nrow(tmp),function(j){&#10;    tmp1=tmp[j,]&#10;    tmp1_u=tmp1$'Upper Threshold Vented Gas Reading'&#10;    tmp1_l=tmp1$'Lower Threshold Vented Gas Reading'&#10;    &#10;    IterNo=tmp1_u-tmp1_l+1&#10;    out1=matrix(rep(tmp1,IterNo),nrow=IterNo, byrow = T)&#10;    out1=cbind(out1,tmp1_l-1+1:IterNo)&#10;    out1&#10;  })&#10;  out=do.call(rbind,out0)&#10;  out&#10;  &#10;}))&#10;RiskScore1=t(apply(RiskScore1,1,as.character))&#10;RiskScore1=data.frame(RiskScore1)&#10;names(RiskScore1)=c('Hazard Category','Pressure','Distance From Property','Lower Threshold Vented Gas Reading','Upper Threshold Vented Gas Reading','Risk Score Value','IterNo','GasReading')&#10;RiskScore1$&quot;Lower Threshold Vented Gas Reading&quot;=RiskScore1$IterNo=NULL&#10;&#10;## process 9&#10;Map_Pressure=data.frame('Category'=c('LP','IP','MP','HP','VHP'),&#10;                        'Abbreviation'=c('Low Pressure','Intermediate Pressure','Medium Pressure','High Pressure','Very High Pressure'))&#10;&#10;&#10;## process 10&#10;Map_HazardCat=data.frame('Category'=c('C','A','B'),&#10;                        'Abbreviation'=c('Over 1.75m Of Open Ground','No Open Ground','Less Than 1.75m Of Open Ground'))&#10;&#10;&#10;## process 11&#10;RiskScore2=merge(RiskScore1,Map_HazardCat,by.x = 'Hazard Category', by.y='Category')&#10;names(RiskScore2)[which(names(RiskScore2)=='Abbreviation')]='Hazardcat1'&#10;&#10;&#10;RiskScore2=merge(RiskScore2,Map_Pressure,by.x = 'Pressure', by.y='Category')&#10;names(RiskScore2)[which(names(RiskScore2)=='Abbreviation')]='Pressure1'&#10;&#10;RiskScore2$'Hazard Category'=RiskScore2$'Pressure'=NULL&#10;&#10;RiskScore2[,c('Upper Threshold Vented Gas Reading', 'Risk Score Value', 'GasReading')]=t(apply(RiskScore2[,c('Upper Threshold Vented Gas Reading', 'Risk Score Value', 'GasReading')],2,as.numeric))&#10;&#10;&#10;###################&#10;#### red words ####&#10;###################&#10;&#10;# load info from 'Distance_Map.xlsx'&#10;d=read_excel('Distance_Map.xlsx',sheet = 'DistanceMap')&#10;DistanceMap=unique(d)&#10;&#10;vfieldnosD = length(DistanceMap$Distance)&#10;&#10;e=read_excel('Reading_Map.xlsx',sheet = 'ReadingMap')&#10;ReadingsMap=unique(e)&#10;&#10;vfieldnosR = length(ReadingMap$Readings)&#10;&#10;f=read_excel('Risk_Score_Map.xlsx',sheet = 'RiskScMap')&#10;RiskScoreMap=unique(f)&#10;&#10;vfieldnosRV = length(RiskScoreMap$RiskScore)&#10;&#10;# create new fields&#10;fields=Hour12Table&#10;&#10;&#10;&#10;for (i in #1:2&#10;    1:min(vfieldnosD,&#10;                max(sapply(names(fields),function(i) suppressWarnings(as.numeric(substr(i,nchar(i),nchar(i))))),na.rm = T)&#10;                )&#10;    ){&#10;  vfieldD = DistanceMap$Distance&#10;  vfieldR = ReadingsMap$Readings&#10;  vfieldRV = RiskScoreMap$RiskScore&#10;  &#10;  tmp=RiskScore2[,c('Hazardcat1','Pressure1','Distance From Property','GasReading','Risk Score Value')]&#10;  names(tmp)=c('HAZARDCAT_VAL','PRESSURE_VAL',vfieldD,vfieldR,vfieldRV)&#10;  &#10;  tmp_fields_paste=mapply('paste',fields[vfieldD],fields[vfieldR])&#10;  tmp_paste=mapply('paste',tmp[vfieldD],tmp[vfieldR])&#10;  &#10;  tmp_fields_paste=mapply('paste',tmp_fields_paste,fields$PRESSURE_VAL)&#10;  tmp_paste=mapply('paste',tmp_paste,tmp$PRESSURE_VAL)&#10;  &#10;  tmp_fields_paste=mapply('paste',tmp_fields_paste,fields$HAZARDCAT_VAL)&#10;  tmp_paste=mapply('paste',tmp_paste,tmp$HAZARDCAT_VAL)&#10;  &#10;  idx=tmp_paste%in%tmp_fields_paste&#10;  tmp0=tmp[idx,]&#10;  &#10;  fields=merge(fields,tmp0,by = c('HAZARDCAT_VAL','PRESSURE_VAL',vfieldD,vfieldR),all.x = T)&#10;}&#10;&#10;&#10;# end new fields&#10;&#10;#&#10;&#10;fieldsRowNum=fields&#10;ColIdx=substr(names(fieldsRowNum),1,9)=='RiskScore'&#10;MaxFieldsRowNum=apply(fieldsRowNum,1,function(i){&#10;  tmp=suppressWarnings(as.numeric(i[ColIdx]))&#10;  if(sum(is.na(tmp))==length(tmp))&#10;    out=NA&#10;  if(sum(is.na(tmp))!=length(tmp))&#10;    out=max(tmp,na.rm = T)&#10;  out&#10;})&#10;fieldsRowNum$MaxRiskScore=MaxFieldsRowNum&#10;&#10;FinalTable1=do.call(rbind,by(fieldsRowNum,fieldsRowNum$OBJ_ZP2_AUFNR,function(i){&#10;  tmp=i&#10;  if(ncol(tmp)==1)&#10;    tmp=t(tmp)&#10;  MaxRisk=tmp[,'MaxRiskScore']&#10;  if(sum(is.na(MaxRisk))==nrow(tmp))&#10;    out=NULL&#10;  if(sum(is.na(MaxRisk))!=nrow(tmp))&#10;    out=tmp[which.max(as.numeric(MaxRisk)),]&#10;  out&#10;}))&#10;&#10;FinalTable1$MaxMaxRiskScore=FinalTable1$MaxRiskScore&#10;FinalTable1$MaxRiskScore=NULL&#10;&#10;fieldsRowNum=merge(fieldsRowNum,FinalTable1,by = intersect(names(fieldsRowNum),names(FinalTable1)),all.x = T)&#10;&#10;}"/>
      </operator>
      <connect from_op="Retrieve Analytics_Four_Months_SAPBW_Extract_May14" from_port="output" to_op="Execute R" to_port="input 1"/>
      <connect from_op="Retrieve Distance_Map" from_port="output" to_op="Execute R" to_port="input 4"/>
      <connect from_op="Retrieve Four_Months_Orders_In_One_Table_May14" from_port="output" to_op="Execute R" to_port="input 2"/>
      <connect from_op="Retrieve Reading_Map" from_port="output" to_op="Execute R" to_port="input 5"/>
      <connect from_op="Retrieve Risk_Score_Map" from_port="output" to_op="Execute R" to_port="input 6"/>
      <connect from_op="Retrieve Risk_Score_Calculation_2D_ Matrix" from_port="output" to_op="Execute R" to_port="input 3"/>
      <connect from_op="Execute R" from_port="output 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
Tagged: