-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathindex.js
92 lines (80 loc) · 2.49 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
const express = require("express");
const multer = require("multer");
const path = require("path");
let fs = require("fs");
let PDFParser = require("pdf2json");
const app = express();
const port = process.env.PORT || 3003;
const router = express.Router();
var shell = require("shelljs");
//multer setting
var storage = multer.diskStorage({
destination: function(req, file, cb) {
cb(null, "./public/myuploads");
},
filename: function(req, file, cb) {
cb(null, file.fieldname + ".pdf");
}
});
var upload = multer({
storage: storage
}).single("pdffile");
//set static folder
app.use(express.static("."));
app.use(express.static("./public"));
//routes
//@type - GET /home
//@desc - route to home page
//@access - PUBLIC
app.get("/", function(req, res) {
res.sendFile(__dirname + "/index.html");
});
//@type - POST /upload
//@desc - route to check page
//@access - PUBLIC
app.post("/upload", (req, res) => {
upload(req, res, error => {
if (error) {
return res.end("Error uploading file.");
} else {
console.log(`1. Parsing the pdf.`);
//parsing the pdf ===================
let pdfParser = new PDFParser(this, 1);
pdfParser.on("pdfParser_dataError", errData => console.error(errData));
pdfParser.on("pdfParser_dataReady", pdfData => {
fs.writeFileSync("./file.txt", pdfParser.getRawTextContent()); //change to ./original.txt here
});
console.log(__dirname + "/file.txt");
pdfParser.loadPDF(
path.resolve(__dirname + "/public/myuploads/pdffile.pdf")
);
console.log("2. Parsing done. Applying NLP to it");
// spawning NLP script on the PDF text=============
res.redirect("/check");
res.end("ended");
}
});
});
//@type - POST /upload
//@desc - route to check page
//@access - PUBLIC
app.use("/", router);
router.get("/check", function(req, res) {
console.log("In check router.3. Using API to query the NLP string");
shell.exec("./checker.sh");
//running shellscript for API
res.end("ended");
});
module.exports = router;
app.listen(port, () => console.log(`server is running fine at ${port}...`));
/*Todo :
1. Upload pdf and then convert it into text.
2. Use NLP on the text
3. Use the NLPd text on API
4. Get the URLs received and then sort the json in descending order.
5. use string matching on top 5 urls
6. log the result as json
7. try to print it on web too.
URL | Percentage matching | Matched words|
-----|---------------------|--------------|
URL1 | 95% |sat,sad,hello |*/