admin管理员组

文章数量:1025251

I am building a page which is using PDF.js to load and render a pdf as the following code.

var url = '/path-to-pdf.js';
PDFJS.workerSrc = "./js/external/pdf.worker.js";

PDFJS.getDocument(url).then(function getPdfHelloWorld(pdf) {

    var pageNumber = 1;
    renderPage($(".center-info")[0], pdf, 1, function pageRenderingComplete() {
        if (pageNumber > pdf.numPages) {
            return; // All pages rendered
        }
        // Continue rendering of the next page
        renderPage($("display-div")[0], pdf, ++pageNumber, pageRenderingComplete);
    });

});

I would like to make client-side download, which means I have to access the raw PDF directly. Is it possible to do that here?

I am building a page which is using PDF.js to load and render a pdf as the following code.

var url = '/path-to-pdf.js';
PDFJS.workerSrc = "./js/external/pdf.worker.js";

PDFJS.getDocument(url).then(function getPdfHelloWorld(pdf) {

    var pageNumber = 1;
    renderPage($(".center-info")[0], pdf, 1, function pageRenderingComplete() {
        if (pageNumber > pdf.numPages) {
            return; // All pages rendered
        }
        // Continue rendering of the next page
        renderPage($("display-div")[0], pdf, ++pageNumber, pageRenderingComplete);
    });

});

I would like to make client-side download, which means I have to access the raw PDF directly. Is it possible to do that here?

Share Improve this question edited Aug 4, 2014 at 14:20 ppn029012 asked Aug 3, 2014 at 5:13 ppn029012ppn029012 5702 gold badges6 silver badges21 bronze badges 1
  • Look here for inspiration: github./mozilla/pdf.js/blob/… – Rob W Commented Aug 4, 2014 at 15:59
Add a ment  | 

2 Answers 2

Reset to default 6

I just got the answer. We can access the data by getData() method.

PDFJS.getDocument(url).then(function getPdfHelloWorld(pdf) {

    pdf.getData().then(function(arrayBuffer) {
        var pdfraw = String.fromCharCode.apply(null, arrayBuffer);

        // Operation your raw pdf here...
    });

Cheers

async function extract(input) {
    const pdf = await pdfJS.getDocument(input);

    const elements = [];

    for (let pageNumber = 1; pageNumber <= pdf.numPages; pageNumber++) {
        const page = await pdf.getPage(pageNumber);
        const textContent = await page.getTextContent({
            normalizeWhitespace: true,
            disableCombineTextItems: false,
        });

        textContent.items.forEach(item => {
            elements.push(item);
        });
    }

    return elements;
}

I am building a page which is using PDF.js to load and render a pdf as the following code.

var url = '/path-to-pdf.js';
PDFJS.workerSrc = "./js/external/pdf.worker.js";

PDFJS.getDocument(url).then(function getPdfHelloWorld(pdf) {

    var pageNumber = 1;
    renderPage($(".center-info")[0], pdf, 1, function pageRenderingComplete() {
        if (pageNumber > pdf.numPages) {
            return; // All pages rendered
        }
        // Continue rendering of the next page
        renderPage($("display-div")[0], pdf, ++pageNumber, pageRenderingComplete);
    });

});

I would like to make client-side download, which means I have to access the raw PDF directly. Is it possible to do that here?

I am building a page which is using PDF.js to load and render a pdf as the following code.

var url = '/path-to-pdf.js';
PDFJS.workerSrc = "./js/external/pdf.worker.js";

PDFJS.getDocument(url).then(function getPdfHelloWorld(pdf) {

    var pageNumber = 1;
    renderPage($(".center-info")[0], pdf, 1, function pageRenderingComplete() {
        if (pageNumber > pdf.numPages) {
            return; // All pages rendered
        }
        // Continue rendering of the next page
        renderPage($("display-div")[0], pdf, ++pageNumber, pageRenderingComplete);
    });

});

I would like to make client-side download, which means I have to access the raw PDF directly. Is it possible to do that here?

Share Improve this question edited Aug 4, 2014 at 14:20 ppn029012 asked Aug 3, 2014 at 5:13 ppn029012ppn029012 5702 gold badges6 silver badges21 bronze badges 1
  • Look here for inspiration: github./mozilla/pdf.js/blob/… – Rob W Commented Aug 4, 2014 at 15:59
Add a ment  | 

2 Answers 2

Reset to default 6

I just got the answer. We can access the data by getData() method.

PDFJS.getDocument(url).then(function getPdfHelloWorld(pdf) {

    pdf.getData().then(function(arrayBuffer) {
        var pdfraw = String.fromCharCode.apply(null, arrayBuffer);

        // Operation your raw pdf here...
    });

Cheers

async function extract(input) {
    const pdf = await pdfJS.getDocument(input);

    const elements = [];

    for (let pageNumber = 1; pageNumber <= pdf.numPages; pageNumber++) {
        const page = await pdf.getPage(pageNumber);
        const textContent = await page.getTextContent({
            normalizeWhitespace: true,
            disableCombineTextItems: false,
        });

        textContent.items.forEach(item => {
            elements.push(item);
        });
    }

    return elements;
}

本文标签: javascriptHow to get the raw data from pdfjsStack Overflow