void-packages/srcpkgs/catdoc/patches/001-XLS_parsing_improvement...

124 lines
3.5 KiB
Diff
Raw Normal View History

2018-10-05 00:44:02 +02:00
Description: Improve the XLS parsing:
* Don't stop processing after an EOF which is not followed by a BOF, as there
are many records that can appear after it (like a graph).
* On unexpected BOF record, dump already extracted data before complaining and
freeing memory.
* Accept different versions of BOF and XF records.
* Add more #defines for record types.
--- src/xlsparse.c
+++ src/xlsparse.c
@@ -107,12 +107,13 @@
itemsread = catdoc_read(rec, 1, reclen, input);
rec[reclen] = '\0';
}
+ /*
+ fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen);
if(eof_flag) {
- if (rectype != BOF) {
+ if (rectype != BOF8) {
break;
}
- }
-/* fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen); */
+ }*/
process_item(rectype,reclen,rec);
if (rectype == MSEOF) {
eof_flag=1;
@@ -150,7 +151,7 @@
case WRITEPROT:
/* File is write protected, but we only read it */
break;
- case 0x42: {
+ case CODEPAGE: {
if (source_charset) break;
codepage=getshort(rec,0);
/*fprintf(stderr,"CODEPAGE %d\n",codepage); */
@@ -274,9 +275,10 @@
}
break;
}
- case 0x03:
- case 0x103:
- case 0x303:
+ /* These 3 don't seem to make any sense. */
+ case INVALID_03:
+ case SXFORMULA:
+ case INVALID_303:
case NUMBER: {
int row,col;
unsigned char **pcell;
@@ -363,22 +365,31 @@
*saved_reference=copy_unicode_string(&src);
break;
}
- case BOF: {
+ case BOF2:
+ case BOF3:
+ case BOF4:
+ case BOF8: {
if (rowptr) {
fprintf(stderr,"BOF when current sheet is not flushed\n");
+ print_sheet();
free_sheet();
}
break;
}
- case XF:
- case 0x43: /*from perl module Spreadsheet::ParseExecel */
+ case XF_4P:
+ case XF_4:
+ case XF: /*from perl module Spreadsheet::ParseExecel */
{
- short int formatIndex = getshort(rec,2);
+ short int formatIndex;
+ if (biff_version == 4)
+ formatIndex = (short int)rec[1];
+ else
+ formatIndex = getshort(rec, 2);
/* we are interested only in format index here */
if (formatTableIndex >= formatTableSize) {
formatTable=realloc(formatTable,
- (formatTableSize+=16)*sizeof(short int));
-
+ (formatTableSize+=16)*sizeof(short int));
+
if (!formatTable) {
fprintf(stderr,"Out of memory for format table");
exit (1);
--- src/xltypes.h
+++ src/xltypes.h
@@ -20,7 +20,7 @@
#define AUTOFILTERINFO 0x9D
#define BACKUP 0x40
#define BLANK 0x201
-#define BOF 0x809
+#define BOF8 0x809
#define BOOKBOOL 0xDA
#define BOOLERR 0x205
#define BOTTOMMARGIN 0x29
@@ -149,11 +149,21 @@
#define WRITEPROT 0x86
#define WSBOOL 0x81
#define XCT 0x59
-#define XF 0xE0
+#define XF_4P 0xE0
#define SST 0xFC
#define CONSTANT_STRING 0xFD
#define REFRESHALL 0x1B7
#define USESELFS 0x160
#define EXTSST 0xFF
/* Vitus additions */
-#define INTEGER_CELL 0x202
+#define INTEGER_CELL 0x202
+/* Tincho addtions */
+#define BOF2 0x09
+#define BOF3 0x209
+#define BOF4 0x409
+#define INVALID_03 0x03
+#define INVALID_303 0x303
+#define MSODRAWING 0xEC
+#define SXFORMULA 0x103
+#define XF 0x43
+#define XF_4 0x443