1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include "root.hpp"
#include "document.hpp"
#include "global.hpp"
#include "orcus/format_detection.hpp"
#include "orcus/info.hpp"
#include <iostream>
#include <sstream>
#include <object.h>
using namespace std;
namespace orcus { namespace python {
PyObject* detect_format(PyObject* /*module*/, PyObject* args, PyObject* kwargs)
{
py_unique_ptr stream = read_stream_from_args(args, kwargs);
if (!stream)
return nullptr;
char* p = nullptr;
Py_ssize_t n = 0;
if (PyBytes_AsStringAndSize(stream.get(), &p, &n) < 0)
return nullptr;
try
{
format_t ft = orcus::detect({p, (size_t)n});
switch (ft)
{
case format_t::ods:
return get_python_enum_value("FormatType", "ODS");
case format_t::xlsx:
return get_python_enum_value("FormatType", "XLSX");
case format_t::gnumeric:
return get_python_enum_value("FormatType", "GNUMERIC");
case format_t::xls_xml:
return get_python_enum_value("FormatType", "XLS_XML");
case format_t::parquet:
return get_python_enum_value("FormatType", "PARQUET");
case format_t::csv:
return get_python_enum_value("FormatType", "CSV");
case format_t::unknown:
default:
return get_python_enum_value("FormatType", "UNKNOWN");
}
}
catch (const std::exception&)
{
PyErr_SetString(PyExc_ValueError, "failed to perform deep detection on this file.");
return nullptr;
}
}
}}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|