%Read the xlsx file with word counts
MText = readmatrix('Source_datasets/Basel_I_Text_Data.xlsx');
MAlgo = readmatrix('Source_datasets/Basel_I_Algo_Data.xlsx');

MText = MText(:,2:end);
MAlgo = MAlgo(:,2:end);

nText = size(MText);
nText = nText(1);
nAlgo = size(MAlgo);
nAlgo = nAlgo(1);

%Count matrix, 20 x 8 with:
%1: Unique Operands
%2: Unique Mathematical Operators
%3: Unique Logical Operators
%4: Unique Regulatory Operators
%5: Total Operands
%6: Total Mathematical Operators
%7: Total Logical Operators
%8: Total Regulatory Operators
%9: Total words

MCountText = zeros(20,9);
MCountAlgo = zeros(20,9);

%Iterate over 20 regulations (last one being the total)
for i=1:20
    
    %Iterate over 4 "unique" measures
    for j=1:4
    
        %Iterate over all words
        for k=1:nText
        MCountText(i,j) = MCountText(i,j) + min(MText(k,i+5),1)*MText(k,j+1);
        end
        
        for k=1:nAlgo
        MCountAlgo(i,j) = MCountAlgo(i,j) + min(MAlgo(k,i+5),1)*MAlgo(k,j+1);
        end
        
    end
    
    %Iterate over 4 "total" measures
    for j=5:8
            
        %Iterate over 96 words
        for k=1:nText
        MCountText(i,j) = MCountText(i,j) + MText(k,i+5)*MText(k,j-4+1);
        end
        
        for k=1:nAlgo
        MCountAlgo(i,j) = MCountAlgo(i,j) + MAlgo(k,i+5)*MAlgo(k,j-4+1);
        end     
    end

        %Total words
        for k=1:nText
        MCountText(i,9) = MCountText(i,9) + MText(k,i+5);
        end
        
        for k=1:nAlgo
        MCountAlgo(i,9) = MCountAlgo(i,9) + MAlgo(k,i+5);
        end    

end

%We now produce a 20 x 6 matrix with the following measures:
%1: Length
%2: Cyclomatic complexity
%3: Quantity
%4: Potential volume
%5: Operator diversity
%6: Level

MeasuresText = zeros(20,6);
MeasuresAlgo = zeros(20,6);

%Iterating over 20 regulations
for i=1:20
MeasuresText(i,1) = MCountText(i,9);
MeasuresText(i,2) = MCountText(i,7);
MeasuresText(i,3) = MCountText(i,8);
MeasuresText(i,4) = 2+MCountText(i,1);
MeasuresText(i,5) = sum(MCountText(i,2:4));
MeasuresText(i,6) = MeasuresText(i,4)/MeasuresText(i,1);
MeasuresAlgo(i,1) = MCountAlgo(i,9);
MeasuresAlgo(i,2) = MCountAlgo(i,7);
MeasuresAlgo(i,3) = MCountAlgo(i,8);
MeasuresAlgo(i,4) = 2+MCountAlgo(i,1);
MeasuresAlgo(i,5) = sum(MCountAlgo(i,2:4));
MeasuresAlgo(i,6) = MeasuresAlgo(i,4)/MeasuresAlgo(i,1);
end

Correlation_Table_Text = corr(MeasuresText(1:19,[1 2 4 5 6]));
Correlation_Table_Text = round(Correlation_Table_Text,2);
Correlation_Table_Text = array2table(Correlation_Table_Text,'VariableNames',{'Length','Cyclomatic','Potential Volume','Operator Diversity','Level'});
Correlation_Table_Algo = corr(MeasuresAlgo(1:19,[1 2 4 5 6]));
Correlation_Table_Algo = round(Correlation_Table_Algo,2);
Correlation_Table_Algo = array2table(Correlation_Table_Algo,'VariableNames',{'Length','Cyclomatic','Potential Volume','Operator Diversity','Level'});
table2latex(Correlation_Table_Text,'Output/Correlation_Text_Table_OA5a.tex')
table2latex(Correlation_Table_Algo,'Output/Correlation_Algo_Table_OA2a.tex')

Correlation_Table_Text_Spearman = corr(MeasuresText(1:19,[1 2 4 5 6]),'Type','Spearman');
Correlation_Table_Text_Spearman = round(Correlation_Table_Text_Spearman,2);
Correlation_Table_Text_Spearman = array2table(Correlation_Table_Text_Spearman,'VariableNames',{'Length','Cyclomatic','Potential Volume','Operator Diversity','Level'});
Correlation_Table_Algo_Spearman = corr(MeasuresAlgo(1:19,[1 2 4 5 6]),'Type','Spearman');
Correlation_Table_Algo_Spearman = round(Correlation_Table_Algo_Spearman,2);
Correlation_Table_Algo_Spearman = array2table(Correlation_Table_Algo_Spearman,'VariableNames',{'Length','Cyclomatic','Potential Volume','Operator Diversity','Level'});
table2latex(Correlation_Table_Text_Spearman,'Output/Correlation_Text_TableOA5b.tex')
table2latex(Correlation_Table_Algo_Spearman,'Output/Correlation_Algo_TableOA2b.tex')

%Table with correlations between algo-based and text-based measures.
%1st Column is Pearson correlation and 2nd column Spearman
Measures_Corr = zeros(6,2);
for i=1:6
Measures_Corr(i,1) = corr(MeasuresText(1:19,i),MeasuresAlgo(1:19,i));
Measures_Corr(i,2) = corr(MeasuresText(1:19,i),MeasuresAlgo(1:19,i),'Type','Spearman');
end

Measures_Corr_Table = round(Measures_Corr,2);
Measures_Corr_Table = array2table(Measures_Corr_Table);
table2latex(Measures_Corr_Table,'Output/Correlations_Table_2.tex')

T=1:1:20;

%Save Table with measures.
MeasuresText2 = [T' MeasuresText];
MeasuresAlgo2 = [T' MeasuresAlgo];
Measures_Table_Text = round(MeasuresText2,2);
Measures_Table_Text = array2table(Measures_Table_Text,'VariableNames',{'Regulation','Length','Cyclomatic','Quantity','Potential Volume','Operator Diversity','Level'});
table2latex(Measures_Table_Text,'Output/Measures_Text_Table_OA4.tex')
Measures_Table_Algo = round(MeasuresAlgo2,2);
Measures_Table_Algo = array2table(Measures_Table_Algo,'VariableNames',{'Regulation','Length','Cyclomatic','Quantity','Potential Volume','Operator Diversity','Level'});
table2latex(Measures_Table_Algo,'Output/Measures_Algo_Table_OA1.tex')