// 02_DoddFrank_Act/ 

//Directory for Windows user:
//cd %systemdrive%\Users\%username%\Downloads\Codes\02_DoddFrank_Act\
//Directory for MacOS user:
//cd ~/Downloads/Codes/02_DoddFrank_Act/


//Table 3: Top 10 Words in each category
import delimited "./Source_datasets/DFA-titles_processed/category_cons_all_titles_most_frequent_keys.csv", clear

replace category = "Operands" if category == "economicoperands" | category == "attributes"
replace category = "LogicalOperators" if category == "logicalconnectors"
replace category = "MathematicalOperators" if category == "mathematicaloperators"
replace category = "RegulatoryOperators" if category == "regulatoryoperators"

drop if category == "functionwords" | category == "legalreferences" | category == ""

* First, sort the data by category and total_count in descending order
gsort category -total_count

* Generate a ranking variable within each category
bysort category: gen rank = _n

* Keep only the top 10 in each category
keep if rank <= 10

* Display the result as a table
list category key total_count if rank <= 10, sepby(category) abbreviate(20)


// Table OA7: Measures for each title
import delimited "Source_datasets/DFA-titles_processed/category_cons_count_all_titles.csv", clear
save "Datasets/counts_titles.dta", replace

import delimited "Source_datasets/DFA-titles_processed/category_unique_count.csv", clear
append using "Datasets/counts_titles.dta"

replace category_unique_count = unique_count if title ==.
replace title = 100 if title ==.
drop unique_count

replace category = "Attributes" if category=="attributes"
replace category = "EconomicOperands" if category=="economicoperands"
replace category = "LogicalConnectors" if category=="logicalconnectors"
replace category = "RegulatoryOperators" if category=="regulatoryoperators"
replace category = "MathematicalOperators" if category=="mathematicaloperators"
replace category = "FunctionWords" if category=="functionwords"
replace category = "LegalReferences" if category=="legalreferences"
replace category = "Other" if category=="other"

egen sum_attributes = total(category_count) if category =="Attributes"
egen sum_operands = total(category_count) if category =="EconomicOperands"
egen sum_logical = total(category_count) if category =="LogicalConnectors"
egen sum_regulatory = total(category_count) if category =="RegulatoryOperators"
egen sum_mathematical = total(category_count) if category =="MathematicalOperators"
egen sum_function = total(category_count) if category =="FunctionWords"
egen sum_legal = total(category_count) if category =="LegalReferences"
egen sum_other = total(category_count) if category =="Other"

replace category_count = sum_attributes if (category =="Attributes" && title==100)
replace category_count = sum_operands if (category =="EconomicOperands" && title==100)
replace category_count = sum_logical if (category =="LogicalConnectors" && title==100)
replace category_count = sum_regulatory if (category =="RegulatoryOperators" && title==100)
replace category_count = sum_mathematical if (category =="MathematicalOperators" && title==100)
replace category_count = sum_function if (category =="FunctionWords" && title==100)
replace category_count = sum_legal if (category =="LegalReferences" && title==100)
replace category_count = sum_other if (category =="Other" && title==100)

drop sum_attributes sum_operands sum_logical sum_regulatory sum_mathematical sum_function sum_legal sum_other

gen words = 0
replace words = category_count

gen operands = 0
replace operands = category_count if (category =="EconomicOperands" || category =="Attributes")

gen u_operands = 0
replace u_operands = category_unique_count if (category =="EconomicOperands" || category =="Attributes")

gen logical = 0
replace logical = category_count if (category =="LogicalConnectors")

gen u_logical = 0
replace u_logical = category_unique_count if (category =="LogicalConnectors")

gen regulatory = 0
replace regulatory = category_count if (category =="RegulatoryOperators")

gen u_regulatory = 0
replace u_regulatory = category_unique_count if (category =="RegulatoryOperators")

gen mathematical = 0
replace mathematical = category_count if (category =="MathematicalOperators")

gen u_mathematical = 0
replace u_mathematical = category_unique_count if (category =="MathematicalOperators")

collapse (sum) words operands u_operands logical u_logical regulatory u_regulatory mathematical u_mathematical, by(title)

gen length = words
gen cyclomatic = logical
gen quantity = regulatory
gen potential = 2 + u_operands
gen diversity = u_logical + u_regulatory + u_mathematical
gen level = potential / length

gen cyclomatic_n = cyclomatic/length
gen quantity_n = quantity/length
gen diversity_n = diversity/length

su length cyclomatic_n quantity_n diversity_n level, d

save Datasets/Measures.dta, replace

*Note: "Title 100" corresponds to the line "Entire Act". 
//Table OA7
quietly estpost tabstat length cyclomatic_n quantity_n level diversity_n, by(title) nototal
esttab, cells("length(label(`:var lab length') fmt(%12.0fc)) cyclomatic_n(label(`:var lab cyclomatic') fmt(%12.2fc)) quantity_n(label(`:var lab quantity') fmt(%12.2fc)) diversity_n(label(`:var lab diversity') fmt(%12.2fc)) level(label(`:var lab level') fmt(%12.2fc))") ///
noobs nomtitle nonumber varlabels(`e(labels)') varwidth(20)  tex

