diff --git a/Cargo.lock b/Cargo.lock index 9326487..98c4132 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -571,7 +571,7 @@ checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" [[package]] name = "etradeTaxReturnHelper" -version = "0.4.0" +version = "0.4.1" dependencies = [ "calamine", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 299cffc..85ffaf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "etradeTaxReturnHelper" -version = "0.4.0" +version = "0.4.1" edition = "2021" description = "Parses etrade financial documents for transaction details (income, tax paid, cost basis) and compute total income and total tax paid according to chosen tax residency (currency)" license = "BSD-3-Clause" diff --git a/Pictures/GUI.png b/Pictures/GUI.png new file mode 100644 index 0000000..1549a05 Binary files /dev/null and b/Pictures/GUI.png differ diff --git a/README.md b/README.md index 8731da2..b069c50 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,16 @@ # etradeTaxReturnHelper -Project that parse e-trade PDF brokerage statements and Gain and Losses documents and compute total gross gain and tax paid in US that are needed for tax return forms out of US. +Project that parse e-trade PDF account statements and Gain and Losses documents and compute total gross gain and tax paid in US that are needed for tax return forms out of US. ### Data for Tax form from capital gains (PIT-38 in Poland) 1. Install this program: `cargo install etradeTaxReturnHelper` -2. Download PDF documents from a year you are filling your tax return form for example: `Brokerage Statement .pdf`: +2. Download PDF documents from a year you are filling your tax return form for example: `Brokerage Statement .pdf` and `MS_ClientStatements_.pdf`: 1. Login to e-trade, navigate to [Documents/Brokerage Statements](https://edoc.etrade.com/e/t/onlinedocs/docsearch?doc_type=stmt) 2. Select date period 3. Download all `ACCOUNT STATEMENT` documents -3. Run: `etradeTaxReturnHelper ` +3. Run: + 1. `etradeTaxReturnHelper ` + 2. Alternatively you can run `etradeTaxReturnHelper` to have program running with GUI (graphical user interface): + ![gui](/Pictures/GUI.png) ### FAQ 1. How to install this project? @@ -17,6 +20,9 @@ Project that parse e-trade PDF brokerage statements and Gain and Losses document `cargo install etradeTaxReturnHelper` 3. For Linux where there is no X server or no priviligies to install system dependencies then you could try to install non-GUI version: `cargo install etradeTaxReturnHelper --no-default-features` +2. Does it work for other financial institutions apart from etrade ? + There is support for saving accounts statements of Revolut bank (CSV files) , as Revolut does not pay tax on customer behalf and tax from capital gain of saving account should be paid by customer. + 2. How does it work? Here is a [demo(PL)](https://www.youtube.com/watch?v=Juw3KJ1JdcA) diff --git a/src/lib.rs b/src/lib.rs index 8d2d674..0b993a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -265,7 +265,7 @@ pub fn run_taxation( String, > { let mut parsed_div_transactions: Vec<(String, f32, f32)> = vec![]; - let mut parsed_sold_transactions: Vec<(String, String, i32, f32, f32)> = vec![]; + let mut parsed_sold_transactions: Vec<(String, String, f32, f32, f32)> = vec![]; let mut parsed_gain_and_losses: Vec<(String, String, f32, f32, f32)> = vec![]; let mut parsed_revolut_transactions: Vec<(String, Currency)> = vec![]; @@ -274,15 +274,17 @@ pub fn run_taxation( // If name contains .pdf then parse as pdf // if name contains .xlsx then parse as spreadsheet if x.contains(".pdf") { - let (mut div_t, mut sold_t, _) = pdfparser::parse_brokerage_statement(x)?; + let (mut div_t, mut sold_t, _) = pdfparser::parse_statement(x)?; parsed_div_transactions.append(&mut div_t); parsed_sold_transactions.append(&mut sold_t); } else if x.contains(".xlsx") { parsed_gain_and_losses.append(&mut xlsxparser::parse_gains_and_losses(x)?); - } else { + } else if x.contains(".csv") { parsed_revolut_transactions.append(&mut csvparser::parse_revolut_transactions(x)?); + } else { + return Err(format!("Error: Unable to open a file: {x}")); } - Ok::<(), &str>(()) + Ok::<(), String>(()) })?; // 2. Verify Transactions verify_dividends_transactions(&parsed_div_transactions)?; diff --git a/src/main.rs b/src/main.rs index ff75f13..440ec44 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,8 +11,9 @@ mod gui; use etradeTaxReturnHelper::run_taxation; use logging::ResultExt; -// TODO: Finish parse_revolut_transactions -// TODO: Add UT for parsing investment document +// TODO: Make a parsing of incomplete date +// TODO: Dividends of revolut should combined with dividends not sold +// TODO: When I sold on Dec there was EST cost (0.04). Make sure it is included in your results // TODO: async to get currency // TODO: parse_gain_and_losses expect -> ? // TODO: GUI : choosing residency @@ -327,6 +328,42 @@ mod tests { } } + #[test] + #[ignore] + fn test_sold_dividends_taxation_2023() -> Result<(), clap::Error> { + // Get all brokerage with dividends only + let myapp = App::new("E-trade tax helper").setting(AppSettings::ArgRequiredElseHelp); + let rd: Box = Box::new(pl::PL {}); + + let matches = create_cmd_line_pattern(myapp).get_matches_from_safe(vec![ + "mytest", + "etrade_data_2023/Brokerage Statement - XXXXX6557 - 202302.pdf", + "etrade_data_2023/Brokerage Statement - XXXXX6557 - 202303.pdf", + "etrade_data_2023/Brokerage Statement - XXXXX6557 - 202306.pdf", + "etrade_data_2023/Brokerage Statement - XXXXX6557 - 202308.pdf", + "etrade_data_2023/Brokerage Statement - XXXXX6557 - 202309.pdf", + "etrade_data_2023/MS_ClientStatements_6557_202309.pdf", + "etrade_data_2023/MS_ClientStatements_6557_202311.pdf", + "etrade_data_2023/MS_ClientStatements_6557_202312.pdf", + "etrade_data_2023/G&L_Collapsed-2023.xlsx", + ])?; + let pdfnames = matches + .values_of("financial documents") + .expect_and_log("error getting brokarage statements pdfs names"); + let pdfnames: Vec = pdfnames.map(|x| x.to_string()).collect(); + + match etradeTaxReturnHelper::run_taxation(&rd, pdfnames) { + Ok((gross_div, tax_div, gross_sold, cost_sold, _, _, _)) => { + assert_eq!( + (gross_div, tax_div, gross_sold, cost_sold), + (8369.726, 1253.2899, 14983.293, 7701.9253) + ); + Ok(()) + } + Err(x) => panic!("Error in taxation process"), + } + } + #[test] #[ignore] fn test_sold_dividends_only_taxation() -> Result<(), clap::Error> { diff --git a/src/pdfparser.rs b/src/pdfparser.rs index acab0d3..c3cca29 100644 --- a/src/pdfparser.rs +++ b/src/pdfparser.rs @@ -1,15 +1,25 @@ use pdf::file::File; +use pdf::object::PageRc; use pdf::primitive::Primitive; pub use crate::logging::ResultExt; +enum StatementType { + BrokerageStatement, + AccountStatement, +} + +#[derive(Clone, Debug, PartialEq)] enum TransactionType { Dividends, Sold, + Tax, Trade, } +#[derive(Debug, PartialEq)] enum ParserState { + SearchingCashFlowBlock, SearchingTransactionEntry, ProcessingTransaction(TransactionType), } @@ -49,8 +59,12 @@ impl Entry for F32Entry { self.val = mystr .trim() .replace(",", "") + .replace("(", "") + .replace(")", "") + .replace("$", "") .parse::() .expect(&format!("Error parsing : {} to f32", mystr)); + log::info!("Parsed f32 value: {}", self.val); } fn getf32(&self) -> Option { Some(self.val) @@ -70,6 +84,7 @@ impl Entry for I32Entry { self.val = mystr .parse::() .expect(&format!("Error parsing : {} to f32", mystr)); + log::info!("Parsed i32 value: {}", self.val); } fn geti32(&self) -> Option { Some(self.val) @@ -89,6 +104,7 @@ impl Entry for DateEntry { if chrono::NaiveDate::parse_from_str(&mystr, "%m/%d/%y").is_ok() { self.val = mystr; + log::info!("Parsed date value: {}", self.val); } } fn getdate(&self) -> Option { @@ -107,6 +123,7 @@ impl Entry for StringEntry { .clone() .into_string() .expect(&format!("Error parsing : {:#?} to f32", pstr)); + log::info!("Parsed String value: {}", self.val); } fn getstring(&self) -> Option { Some(self.val.clone()) @@ -125,8 +142,58 @@ fn create_dividend_parsing_sequence(sequence: &mut std::collections::VecDeque>) { + sequence.push_back(Box::new(StringEntry { + val: String::new(), + patterns: vec!["INTEL CORP".to_owned()], + })); + sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Tax Entry +} + +fn create_dividend_fund_parsing_sequence( + sequence: &mut std::collections::VecDeque>, +) { + sequence.push_back(Box::new(StringEntry { + val: String::new(), + patterns: vec!["TREASURY LIQUIDITY FUND".to_owned()], + })); + sequence.push_back(Box::new(StringEntry { + val: String::new(), + patterns: vec!["DIV PAYMENT".to_owned()], + })); + sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Income Entry +} + +fn create_qualified_dividend_parsing_sequence( + sequence: &mut std::collections::VecDeque>, +) { + sequence.push_back(Box::new(StringEntry { + val: String::new(), + patterns: vec!["INTEL CORP".to_owned()], + })); + sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Income Entry +} + fn create_sold_parsing_sequence(sequence: &mut std::collections::VecDeque>) { - sequence.push_back(Box::new(I32Entry { val: 0 })); // Quantity + sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Quantity + sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Price + sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Amount Sold +} + +fn create_sold_2_parsing_sequence(sequence: &mut std::collections::VecDeque>) { + sequence.push_back(Box::new(StringEntry { + val: String::new(), + patterns: vec!["INTEL CORP".to_owned()], + })); + sequence.push_back(Box::new(StringEntry { + val: String::new(), + patterns: vec!["ACTED AS AGENT".to_owned()], + })); + sequence.push_back(Box::new(StringEntry { + val: String::new(), + patterns: vec!["UNSOLICITED TRADE".to_owned()], + })); + sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Quantity sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Price sequence.push_back(Box::new(F32Entry { val: 0.0 })); // Amount Sold } @@ -211,11 +278,11 @@ fn create_trade_parsing_sequence(sequence: &mut std::collections::VecDeque>, transaction_dates: &mut Vec, -) -> Option<(String, String, i32, f32, f32)> { +) -> Option<(String, String, f32, f32, f32)> { let quantity = transaction .next() .unwrap() - .geti32() + .getf32() .expect_and_log("Processing of Sold transaction went wrong"); let price = transaction .next() @@ -226,70 +293,203 @@ fn yield_sold_transaction( .next() .unwrap() .getf32() - .expect_and_log("Prasing of Sold transaction went wrong"); + .expect_and_log("Parsing of Sold transaction went wrong"); // Last transaction date is settlement date // next to last is trade date let (trade_date, settlement_date) = match transaction_dates.len() { - 2 => { - let settlement_date = transaction_dates - .pop() - .expect("Error: missing trade date when parsing"); - let trade_date = transaction_dates - .pop() - .expect("Error: missing settlement_date when parsing"); - (trade_date, settlement_date) - } 1 => { log::info!("Detected unsettled sold transaction. Skipping"); return None; } - _ => { + 0 => { log::error!( "Error parsing transaction & settlement dates. Number of parsed dates: {}", transaction_dates.len() ); panic!("Error processing sold transaction. Exitting!") } + _ => { + let settlement_date = transaction_dates + .pop() + .expect("Error: missing trade date when parsing"); + let trade_date = transaction_dates + .pop() + .expect("Error: missing settlement_date when parsing"); + (trade_date, settlement_date) + } }; Some((trade_date, settlement_date, quantity, price, amount_sold)) } -/// This function parses given PDF document -/// and returns result of parsing which is a tuple of -/// found Dividends paid transactions (div_transactions), -/// Sold stock transactions (sold_transactions) -/// information on transactions in case of parsing trade document (trades) -/// Dividends paid transaction is: -/// transaction date, gross_us, tax_us, -/// Sold stock transaction is : -/// (trade_date, settlement_date, quantity, price, amount_sold) -pub fn parse_brokerage_statement( - pdftoparse: &str, +/// Recognize whether PDF document is of Brokerage Statement type (old e-trade type of PDF +/// document) or maybe Single account statment (newer e-trade/morgan stanley type of document) +fn recognize_statement(page: PageRc) -> Result { + log::info!("Starting to recognize PDF document type"); + let contents = page + .contents + .as_ref() + .ok_or("Unable to get content of first PDF page")?; + + let mut statement_type = StatementType::BrokerageStatement; + contents.operations.iter().try_for_each(|op| { + match op.operator.as_ref() { + "Tj" => { + // Text show + if op.operands.len() > 0 { + //transaction_date = op.operands[0]; + let a = &op.operands[0]; + log::info!("Detected PDF object: {a}"); + match a { + Primitive::String(actual_string) => { + let raw_string = actual_string.clone().into_string(); + let rust_string = if let Ok(r) = raw_string { + r.trim().to_uppercase() + } else { + "".to_owned() + }; + + if rust_string == "CLIENT STATEMENT" { + statement_type = StatementType::AccountStatement; + log::info!("PDF parser recognized Account Statement document by finding: \"{rust_string}\""); + return Ok(()); + } + }, + + _ => (), + } + } + } + _ => {} + } + Ok::<(),String>(()) + })?; + + Ok(statement_type) +} + +fn process_transaction( + div_transactions: &mut Vec<(String, f32, f32)>, + sold_transactions: &mut Vec<(String, String, f32, f32, f32)>, + actual_string: &pdf::primitive::PdfString, + transaction_dates: &mut Vec, + processed_sequence: &mut Vec>, + sequence: &mut std::collections::VecDeque>, + transaction_type: TransactionType, +) -> Result { + let mut state = ParserState::ProcessingTransaction(transaction_type.clone()); + let possible_obj = sequence.pop_front(); + match possible_obj { + // Move executed parser objects into Vector + // attach only i32 and f32 elements to + // processed queue + Some(mut obj) => { + obj.parse(actual_string); + // attach to sequence the same string parser if pattern is not met + match obj.getstring() { + Some(token) => { + if obj.is_pattern() == false && token != "$" { + sequence.push_front(obj); + } + } + + None => processed_sequence.push(obj), + } + + // If sequence of expected entries is + // empty then extract data from + // processeed elements + if sequence.is_empty() { + state = ParserState::SearchingTransactionEntry; + let mut transaction = processed_sequence.iter(); + match transaction_type { + TransactionType::Tax => { + // Ok we assume here that taxation of transaction appears later in document + // than actual transaction that is a subject to taxation + let tax_us = transaction + .next() + .unwrap() + .getf32() + .ok_or("Processing of Tax transaction went wrong")?; + + // Here we just go through registered transactions and pick the one where + // income is higher than tax and apply tax value + let subject_to_tax = div_transactions + .iter_mut() + .find(|x| x.1 > tax_us) + .ok_or("Error: Unable to find transaction that was taxed")?; + log::info!("Tax: {tax_us} was applied to {subject_to_tax:?}"); + subject_to_tax.2 = tax_us; + log::info!("Completed parsing Tax transaction"); + } + TransactionType::Dividends => { + let gross_us = transaction + .next() + .unwrap() + .getf32() + .ok_or("Processing of Dividend transaction went wrong")?; + + div_transactions.push(( + transaction_dates + .pop() + .ok_or("Error: missing transaction dates when parsing")?, + gross_us, + 0.0, // No tax info yet. It will be added later in Tax section + )); + log::info!("Completed parsing Dividend transaction"); + } + TransactionType::Sold => { + if let Some(trans_details) = + yield_sold_transaction(&mut transaction, transaction_dates) + { + sold_transactions.push(trans_details); + } + log::info!("Completed parsing Sold transaction"); + } + TransactionType::Trade => { + return Err("TransactionType::Trade should not appear during account statement processing!".to_string()); + } + } + processed_sequence.clear(); + } else { + state = ParserState::ProcessingTransaction(transaction_type); + } + } + + // In nothing more to be done then just extract + // parsed data from paser objects + None => { + state = ParserState::ProcessingTransaction(transaction_type); + } + } + Ok(state) +} + +/// Parse borkerage statement document type +fn parse_brokerage_statement<'a, I>( + pages_iter: I, ) -> Result< ( Vec<(String, f32, f32)>, - Vec<(String, String, i32, f32, f32)>, + Vec<(String, String, f32, f32, f32)>, Vec<(String, String, i32, f32, f32, f32, f32, f32)>, ), - &str, -> { - //2. parsing each pdf - let mypdffile = File::>::open(pdftoparse) - .expect_and_log(&format!("Error opening and parsing file: {}", pdftoparse)); - + String, +> +where + I: Iterator>, +{ + let mut div_transactions: Vec<(String, f32, f32)> = vec![]; + let mut sold_transactions: Vec<(String, String, f32, f32, f32)> = vec![]; + let mut trades: Vec<(String, String, i32, f32, f32, f32, f32, f32)> = vec![]; let mut state = ParserState::SearchingTransactionEntry; let mut sequence: std::collections::VecDeque> = std::collections::VecDeque::new(); let mut processed_sequence: Vec> = vec![]; // Queue for transaction dates. Pop last one or last two as trade and settlement dates let mut transaction_dates: Vec = vec![]; - let mut div_transactions: Vec<(String, f32, f32)> = vec![]; - let mut sold_transactions: Vec<(String, String, i32, f32, f32)> = vec![]; - let mut trades: Vec<(String, String, i32, f32, f32, f32, f32, f32)> = vec![]; - log::info!("Parsing: {} of {} pages", pdftoparse, mypdffile.num_pages()); - for page in mypdffile.pages() { + for page in pages_iter { let page = page.unwrap(); let contents = page.contents.as_ref().unwrap(); for op in contents.operations.iter() { @@ -299,11 +499,15 @@ pub fn parse_brokerage_statement( if op.operands.len() > 0 { //transaction_date = op.operands[0]; let a = &op.operands[0]; + log::trace!("Detected PDF object: {a}"); match a { Primitive::Array(c) => { for e in c { if let Primitive::String(actual_string) = e { match state { + ParserState::SearchingCashFlowBlock => { + log::error!("Brokerage documents do not have cashflow block!") + } ParserState::SearchingTransactionEntry => { let rust_string = actual_string.clone().into_string().unwrap(); @@ -364,6 +568,9 @@ pub fn parse_brokerage_statement( let mut transaction = processed_sequence.iter(); match transaction_type { + TransactionType::Tax => { + return Err("TransactionType::Tax should not appear during brokerage statement processing!".to_string()); + } TransactionType::Dividends => { let tax_us = transaction.next().unwrap().getf32().expect_and_log("Processing of Dividend transaction went wrong"); let gross_us = transaction.next().unwrap().getf32().expect_and_log("Processing of Dividend transaction went wrong"); @@ -440,6 +647,208 @@ pub fn parse_brokerage_statement( Ok((div_transactions, sold_transactions, trades)) } +fn check_if_transaction( + candidate_string: &str, + dates: &mut Vec, + sequence: &mut std::collections::VecDeque>, + year: Option, +) -> Result { + let mut state = ParserState::SearchingTransactionEntry; + + log::info!("Searching for transaction through: \"{candidate_string}\""); + + let actual_year = + year.ok_or("Missing year that should be parsed before transactions".to_owned())?; + + if candidate_string == "DIVIDEND" { + create_dividend_fund_parsing_sequence(sequence); + state = ParserState::ProcessingTransaction(TransactionType::Dividends); + log::info!("Starting to parse Dividend Fund transaction"); + } else if candidate_string == "QUALIFIED DIVIDEND" { + create_qualified_dividend_parsing_sequence(sequence); + state = ParserState::ProcessingTransaction(TransactionType::Dividends); + log::info!("Starting to parse Qualified Dividend transaction"); + } else if candidate_string == "SOLD" { + create_sold_2_parsing_sequence(sequence); + state = ParserState::ProcessingTransaction(TransactionType::Sold); + log::info!("Starting to parse Sold transaction"); + } else if candidate_string == "TAX WITHHOLDING" { + create_tax_parsing_sequence(sequence); + state = ParserState::ProcessingTransaction(TransactionType::Tax); + log::info!("Starting to parse Tax transaction"); + } else if candidate_string == "NET CREDITS/(DEBITS)" { + // "NET CREDITS/(DEBITS)" is marking the end of CASH FLOW ACTIVITIES block + state = ParserState::SearchingCashFlowBlock; + log::info!("Finished parsing transactions"); + } else { + let datemonth_pattern = + regex::Regex::new(r"^(0?[1-9]|1[012])/(0?[1-9]|[12][0-9]|3[01])$").unwrap(); + if datemonth_pattern.is_match(candidate_string) { + dates.push(candidate_string.to_owned() + "/" + actual_year.as_str()); + } + } + Ok(state) +} + +/// Get las two digits of year from pattern like: "(AS OF 12/31/23)" +fn yield_year(rust_string: &str) -> Option { + let period_pattern = regex::Regex::new(r"\d{2}\)").unwrap(); + match period_pattern.find(rust_string) { + Some(x) => { + let year_str = x.as_str(); + let last_two_digits = &year_str[..year_str.len() - 1]; + Some(last_two_digits.to_string()) + } + None => None, + } +} + +/// Parse borkerage statement document type +fn parse_account_statement<'a, I>( + pages_iter: I, +) -> Result< + ( + Vec<(String, f32, f32)>, + Vec<(String, String, f32, f32, f32)>, + Vec<(String, String, i32, f32, f32, f32, f32, f32)>, + ), + String, +> +where + I: Iterator>, +{ + let mut div_transactions: Vec<(String, f32, f32)> = vec![]; + let mut sold_transactions: Vec<(String, String, f32, f32, f32)> = vec![]; + let trades: Vec<(String, String, i32, f32, f32, f32, f32, f32)> = vec![]; + let mut state = ParserState::SearchingCashFlowBlock; + let mut sequence: std::collections::VecDeque> = + std::collections::VecDeque::new(); + let mut processed_sequence: Vec> = vec![]; + // Queue for transaction dates. Pop last one or last two as trade and settlement dates + let mut transaction_dates: Vec = vec![]; + let mut year: Option = None; + + for page in pages_iter { + let page = page.unwrap(); + let contents = page.contents.as_ref().unwrap(); + for op in contents.operations.iter() { + match op.operator.as_ref() { + "Tj" => { + // Text show + if op.operands.len() > 0 { + //transaction_date = op.operands[0]; + let a = &op.operands[0]; + log::trace!("Parsing account statement: Detected PDF object: {a}"); + match a { + Primitive::String(actual_string) => { + let raw_string = actual_string.clone().into_string(); + let rust_string = if let Ok(r) = raw_string { + r.trim().to_uppercase().replace("$", "") + } else { + "".to_owned() + }; + // Ignore empty tokens + if rust_string != "" { + match state { + ParserState::SearchingCashFlowBlock => { + // Pattern to match "(AS OF )" + let date_pattern = regex::Regex::new(r"\(AS OF (\d{1,2}\/\d{1,2}\/\d{2})\)").map_err(|_| "Unable to create regular expression to capture fiscal year")?; + + // When we find "CASH FLOW ACTIVITY BY DATE" then + // it is a starting point of transactions we are + // interested in + if rust_string == "CASH FLOW ACTIVITY BY DATE" { + state = ParserState::SearchingTransactionEntry; + log::info!("Parsing account statement: \"CASH FLOW ACTIVITY BY DATE\" detected. Start to parse transactions"); + } else if date_pattern.is_match(rust_string.as_str()) + && year.is_none() + { + // If we find (AS OF )) + // get year (last two digits out of it) + year = yield_year(&rust_string); + } + } + ParserState::SearchingTransactionEntry => { + state = check_if_transaction( + &rust_string, + &mut transaction_dates, + &mut sequence, + year.clone(), + )?; + } + ParserState::ProcessingTransaction(transaction_type) => { + state = process_transaction( + &mut div_transactions, + &mut sold_transactions, + &actual_string, + &mut transaction_dates, + &mut processed_sequence, + &mut sequence, + transaction_type, + )? + } + } + } + } + _ => (), + } + } + } + _ => {} + } + } + } + + Ok((div_transactions, sold_transactions, trades)) +} +/// This function parses given PDF document +/// and returns result of parsing which is a tuple of +/// found Dividends paid transactions (div_transactions), +/// Sold stock transactions (sold_transactions) +/// information on transactions in case of parsing trade document (trades) +/// Dividends paid transaction is: +/// transaction date, gross_us, tax_us, +/// Sold stock transaction is : +/// (trade_date, settlement_date, quantity, price, amount_sold) +pub fn parse_statement( + pdftoparse: &str, +) -> Result< + ( + Vec<(String, f32, f32)>, + Vec<(String, String, f32, f32, f32)>, + Vec<(String, String, i32, f32, f32, f32, f32, f32)>, + ), + String, +> { + //2. parsing each pdf + let mypdffile = File::>::open(pdftoparse) + .map_err(|_| format!("Error opening and parsing file: {}", pdftoparse))?; + + log::info!("Parsing: {} of {} pages", pdftoparse, mypdffile.num_pages()); + + let mut pdffile_iter = mypdffile.pages(); + + let first_page = pdffile_iter + .next() + .unwrap() + .map_err(|_| "Unable to get first page of PDF file".to_string())?; + + let document_type = recognize_statement(first_page)?; + + let (div_transactions, sold_transactions, trades) = match document_type { + StatementType::BrokerageStatement => { + log::info!("Processing brokerage statement PDF"); + parse_brokerage_statement(pdffile_iter)? + } + StatementType::AccountStatement => { + log::info!("Processing Account statement PDF"); + parse_account_statement(pdffile_iter)? + } + }; + + Ok((div_transactions, sold_transactions, trades)) +} + #[cfg(test)] mod tests { use super::*; @@ -468,6 +877,25 @@ mod tests { f.parse(&pdf::primitive::PdfString::new(data)); assert_eq!(f.getf32(), Some(4877.36)); + let data: Vec = vec![ + '(' as u8, '5' as u8, '7' as u8, '.' as u8, '9' as u8, '8' as u8, ')' as u8, + ]; + let mut f = F32Entry { val: 0.0 }; + f.parse(&pdf::primitive::PdfString::new(data)); + assert_eq!(f.getf32(), Some(57.98)); + + let data: Vec = vec!['$' as u8, '1' as u8, '.' as u8, '2' as u8, '2' as u8]; + let mut f = F32Entry { val: 0.0 }; + f.parse(&pdf::primitive::PdfString::new(data)); + assert_eq!(f.getf32(), Some(1.22)); + + let data: Vec = vec![ + '8' as u8, '2' as u8, '.' as u8, '0' as u8, '0' as u8, '0' as u8, + ]; + let mut f = F32Entry { val: 0.0 }; + f.parse(&pdf::primitive::PdfString::new(data)); + assert_eq!(f.getf32(), Some(82.00)); + // company code let data: Vec = vec!['D' as u8, 'L' as u8, 'B' as u8]; let mut s = StringEntry { @@ -486,7 +914,27 @@ mod tests { std::collections::VecDeque::new(); create_sold_parsing_sequence(&mut sequence); let mut processed_sequence: Vec> = vec![]; - processed_sequence.push(Box::new(I32Entry { val: 42 })); //quantity + processed_sequence.push(Box::new(F32Entry { val: 42.0 })); //quantity + processed_sequence.push(Box::new(F32Entry { val: 28.8400 })); // Price + processed_sequence.push(Box::new(F32Entry { val: 1210.83 })); // Amount Sold + + yield_sold_transaction(&mut processed_sequence.iter(), &mut transaction_dates) + .ok_or("Parsing error".to_string())?; + Ok(()) + } + + #[test] + fn test_transaction_validation_more_dates() -> Result<(), String> { + let mut transaction_dates: Vec = vec![ + "11/28/22".to_string(), + "11/29/22".to_string(), + "12/01/22".to_string(), + ]; + let mut sequence: std::collections::VecDeque> = + std::collections::VecDeque::new(); + create_sold_parsing_sequence(&mut sequence); + let mut processed_sequence: Vec> = vec![]; + processed_sequence.push(Box::new(F32Entry { val: 42.0 })); //quantity processed_sequence.push(Box::new(F32Entry { val: 28.8400 })); // Price processed_sequence.push(Box::new(F32Entry { val: 1210.83 })); // Amount Sold @@ -502,7 +950,7 @@ mod tests { std::collections::VecDeque::new(); create_sold_parsing_sequence(&mut sequence); let mut processed_sequence: Vec> = vec![]; - processed_sequence.push(Box::new(I32Entry { val: 42 })); //quantity + processed_sequence.push(Box::new(F32Entry { val: 42.0 })); //quantity processed_sequence.push(Box::new(F32Entry { val: 28.8400 })); // Price processed_sequence.push(Box::new(F32Entry { val: 1210.83 })); // Amount Sold @@ -513,11 +961,92 @@ mod tests { Ok(()) } + #[test] + fn test_check_if_transaction() -> Result<(), String> { + let rust_string = "DIVIDEND"; + let mut transaction_dates = vec![]; + let mut sequence = std::collections::VecDeque::new(); + + assert_eq!( + check_if_transaction( + &rust_string, + &mut transaction_dates, + &mut sequence, + Some("23".to_owned()) + ), + Ok(ParserState::ProcessingTransaction( + TransactionType::Dividends + )) + ); + + let rust_string = "QUALIFIED DIVIDEND"; + assert_eq!( + check_if_transaction( + &rust_string, + &mut transaction_dates, + &mut sequence, + Some("23".to_owned()) + ), + Ok(ParserState::ProcessingTransaction( + TransactionType::Dividends + )) + ); + + let rust_string = "QUALIFIED DIVIDEND"; + assert_eq!( + check_if_transaction(&rust_string, &mut transaction_dates, &mut sequence, None), + Err("Missing year that should be parsed before transactions".to_owned()) + ); + + let rust_string = "CASH"; + assert_eq!( + check_if_transaction( + &rust_string, + &mut transaction_dates, + &mut sequence, + Some("23".to_owned()) + ), + Ok(ParserState::SearchingTransactionEntry) + ); + + Ok(()) + } + + #[test] + fn test_yield_year() -> Result<(), String> { + let rust_string = "(AS OF 12/31/23)"; + assert_eq!(yield_year(&rust_string), Some("23".to_owned())); + Ok(()) + } + + #[test] + #[ignore] + fn test_account_statement() -> Result<(), String> { + assert_eq!( + parse_statement("data/MS_ClientStatements_6557_202312.pdf"), + (Ok(( + vec![ + ("12/1/23".to_owned(), 1.22, 0.00), + ("12/1/23".to_owned(), 386.50, 57.98), + ], + vec![( + "12/21/23".to_owned(), + "12/26/23".to_owned(), + 82.0, + 46.45, + 3808.86 + )], + vec![] + ))) + ); + Ok(()) + } + #[test] #[ignore] fn test_parse_brokerage_statement() -> Result<(), String> { assert_eq!( - parse_brokerage_statement("data/example-divs.pdf"), + parse_statement("data/example-divs.pdf"), (Ok(( vec![("03/01/22".to_owned(), 698.25, 104.74)], vec![], @@ -525,13 +1054,13 @@ mod tests { ))) ); assert_eq!( - parse_brokerage_statement("data/example-sold-wire.pdf"), + parse_statement("data/example-sold-wire.pdf"), Ok(( vec![], vec![( "05/02/22".to_owned(), "05/04/22".to_owned(), - -1, + -1.0, 43.69, 43.67 )], @@ -541,7 +1070,7 @@ mod tests { //TODO(jczaja): Renable reinvest dividends case as soon as you get some PDFs //assert_eq!( - // parse_brokerage_statement("data/example3.pdf"), + // parse_statement("data/example3.pdf"), // ( // vec![ // ("06/01/21".to_owned(), 0.17, 0.03), @@ -553,7 +1082,7 @@ mod tests { //); //assert_eq!( - // parse_brokerage_statement("data/example5.pdf"), + // parse_statement("data/example5.pdf"), // ( // vec![], // vec![], diff --git a/src/transactions.rs b/src/transactions.rs index 72aa882..5a5d031 100644 --- a/src/transactions.rs +++ b/src/transactions.rs @@ -38,7 +38,7 @@ pub fn verify_dividends_transactions( /// we ignore those and use net income rather than principal /// Actual Tax is to be paid from settlement_date pub fn reconstruct_sold_transactions( - sold_transactions: &Vec<(String, String, i32, f32, f32)>, + sold_transactions: &Vec<(String, String, f32, f32, f32)>, gains_and_losses: &Vec<(String, String, f32, f32, f32)>, ) -> Result, String> { // Ok What do I need. @@ -442,7 +442,7 @@ mod tests { #[test] fn test_sold_transaction_reconstruction_dividiends_only() -> Result<(), String> { - let parsed_sold_transactions: Vec<(String, String, i32, f32, f32)> = vec![]; + let parsed_sold_transactions: Vec<(String, String, f32, f32, f32)> = vec![]; let parsed_gains_and_losses: Vec<(String, String, f32, f32, f32)> = vec![]; @@ -459,18 +459,18 @@ mod tests { #[test] fn test_sold_transaction_reconstruction_ok() -> Result<(), String> { - let parsed_sold_transactions: Vec<(String, String, i32, f32, f32)> = vec![ + let parsed_sold_transactions: Vec<(String, String, f32, f32, f32)> = vec![ ( "06/01/21".to_string(), "06/03/21".to_string(), - 1, + 1.0, 25.0, 24.8, ), ( "03/01/21".to_string(), "03/03/21".to_string(), - 2, + 2.0, 10.0, 19.8, ), @@ -526,10 +526,10 @@ mod tests { #[test] #[should_panic] fn test_sold_transaction_reconstruction_second_fail() { - let parsed_sold_transactions: Vec<(String, String, i32, f32, f32)> = vec![( + let parsed_sold_transactions: Vec<(String, String, f32, f32, f32)> = vec![( "11/07/22".to_string(), // trade date "11/09/22".to_string(), // settlement date - 173, // quantity + 173.0, // quantity 28.2035, // price 4877.36, // amount sold )]; @@ -563,18 +563,18 @@ mod tests { #[test] fn test_sold_transaction_reconstruction_multistock() -> Result<(), String> { - let parsed_sold_transactions: Vec<(String, String, i32, f32, f32)> = vec![ + let parsed_sold_transactions: Vec<(String, String, f32, f32, f32)> = vec![ ( "12/21/22".to_string(), "12/23/22".to_string(), - 163, + 163.0, 26.5900, 4332.44, ), ( "12/19/22".to_string(), "12/21/22".to_string(), - 252, + 252.0, 26.5900, 6698.00, ), @@ -652,18 +652,18 @@ mod tests { #[test] fn test_sold_transaction_reconstruction_no_gains_fail() { - let parsed_sold_transactions: Vec<(String, String, i32, f32, f32)> = vec![ + let parsed_sold_transactions: Vec<(String, String, f32, f32, f32)> = vec![ ( "06/01/21".to_string(), "06/03/21".to_string(), - 1, + 1.0, 25.0, 24.8, ), ( "03/01/21".to_string(), "03/03/21".to_string(), - 2, + 2.0, 10.0, 19.8, ),