C:\Users\deepankarg183\Downloads\Cost Code Detail Sample Dataset.pdf
##Using tabulizer
data <- read.Alteryx("#1",mode="data.frame")
f <- data$Field1
# Use pdf_text() function to return a character vector
# containing the text for each page of the PDF
out <- tabulizer::extract_tables(file.path(f),page=1)
# convert the character vector to a data frame
df_txt <- data.frame(out)
write.Alteryx(df_txt, 1)
##using pdf tools
data <- read.Alteryx("#1",mode="data.frame")
# Use pdf_text() function to return a character vector
# containing the text for each page of the PDF
txt <- pdftools::pdf_text(file.path(data$Field1))
# convert the character vector to a data frame
df_txt <- data.frame(txt)
# output the data frame in steam 1
write.Alteryx(df_txt, 2)
txt
txt
Replace
Warn
txt_Matched
Replace:
\s{2,}
|
RecordID
1
Int32
6
0
[RecordID] = 1
Simple
=
RecordID
True
fixed
2019-06-10 11:52:42
0
1
2019-06-10 11:52:42
2019-06-10 11:52:42
[RecordID] = 1
txt
Last
txt
Horizontal
Reading unstructured data from PDF