Merged revisions 4859-5013,5016-5018 via svnmerge from

svn://source.netsurf-browser.org/branches/takkaria/netsurf-hubbub

........
  r4860 | takkaria | 2008-08-02 03:51:35 +0100 (Sat, 02 Aug 2008) | 2 lines
  
  A really horribly rough first go at integrating hubbub with NetSurf.  Segfaults, but I'm not sure what I've done wrong.
........
  r4861 | jmb | 2008-08-02 05:01:19 +0100 (Sat, 02 Aug 2008) | 3 lines
  
  Fix segfault caused by mismatched struct layout expectations.
  Fix warnings, too.
........
  r4862 | jmb | 2008-08-02 05:11:02 +0100 (Sat, 02 Aug 2008) | 2 lines
  
  Destroy parser as soon as it's no longer needed, and flag this so that html_destroy doesn't cause things to trample all over the heap.
........
  r4863 | takkaria | 2008-08-02 15:30:34 +0100 (Sat, 02 Aug 2008) | 2 lines
  
  Remove deugging printf()s, add Aliases file, add script { display: none; } block.
........
  r4868 | takkaria | 2008-08-02 22:14:55 +0100 (Sat, 02 Aug 2008) | 2 lines
  
  Fix a segfault bug.
........
  r4869 | takkaria | 2008-08-02 22:17:58 +0100 (Sat, 02 Aug 2008) | 2 lines
  
  Add gtk/res/Aliases as a symlink to the one in !NS/Resources
........
  r4870 | takkaria | 2008-08-02 22:26:31 +0100 (Sat, 02 Aug 2008) | 2 lines
  
  Fix symlink.
........
  r4885 | jmb | 2008-08-03 23:26:54 +0100 (Sun, 03 Aug 2008) | 2 lines
  
  Make multiple parallel parser instances work correctly.
........
  r4886 | tlsa | 2008-08-04 00:21:29 +0100 (Mon, 04 Aug 2008) | 1 line
  
  Don't display contents of STYLE.
........
  r4891 | jmb | 2008-08-04 01:18:07 +0100 (Mon, 04 Aug 2008) | 5 lines
  
  Fix reparent_children to actually work
  Make get_parent_node pay attention to the element_only flag
  Fixup node referencing when appending a text child
  Make clone_node clone attributes and namespace information in the non-deep case
........
  r4918 | jmb | 2008-08-05 15:27:03 +0100 (Tue, 05 Aug 2008) | 2 lines
  
  Fix debug target
........
  r4944 | takkaria | 2008-08-07 12:56:50 +0100 (Thu, 07 Aug 2008) | 2 lines
  
  Use talloc to allocate space for Hubbub.
........
  r4993 | takkaria | 2008-08-10 17:49:47 +0100 (Sun, 10 Aug 2008) | 2 lines
  
  Stub out the encoding change callback so NS-hubbub compiles again.
........
  r4994 | takkaria | 2008-08-10 18:02:33 +0100 (Sun, 10 Aug 2008) | 2 lines
  
  Tell Hubbub the encoding that HTTP gives us, if we have one.
........
  r5001 | takkaria | 2008-08-11 02:53:24 +0100 (Mon, 11 Aug 2008) | 2 lines
  
  First go at implementing proper <meta charset> support in NetSurf, amongst some refactoring.  Probably works, but I have no pages around to test it on.
........
  r5002 | takkaria | 2008-08-11 02:56:35 +0100 (Mon, 11 Aug 2008) | 2 lines
  
  Fix (I hope) a 64-bit compiler warning.
........
  r5012 | takkaria | 2008-08-11 08:40:28 +0100 (Mon, 11 Aug 2008) | 2 lines
  
  Fix some nits from jmb.  (Remove const from html->encoding, set encoding_source to something about right.)
........
  r5013 | takkaria | 2008-08-11 08:48:50 +0100 (Mon, 11 Aug 2008) | 2 lines
  
  Properly fix <meta charset> handling, by passing in the right thing as the context.  No idea how this worked before. :) (credit: jmb)
........
  r5017 | jmb | 2008-08-11 09:13:22 +0100 (Mon, 11 Aug 2008) | 7 lines
  
  Pedantic typo fix.
  Make Hubbub YES when building for RISC OS.
  Make Hubbub AUTO when building for GTK
  Ignore the presence of Hubbub on other platforms.
  Remove the explicit libparserutils pkg-config stuff from the makefile (see r5016)
  Add some logic that means Hubbub gets enabled correctly when building on RISC OS and when cross-compiling for it. (This is ugly and will go away when pkg-config is available on RO)
........

svn path=/trunk/netsurf/; revision=5019
This commit is contained in:
John Mark Bell 2008-08-11 08:17:48 +00:00
parent 058fcac225
commit 191d876e41
10 changed files with 870 additions and 6 deletions

302
!NetSurf/Resources/Aliases Normal file
View File

@ -0,0 +1,302 @@
# > Unicode:Files.Aliases
# Mapping of character set encoding names to their canonical form
#
# Lines starting with a '#' are comments, blank lines are ignored.
#
# Based on http://www.iana.org/assignments/character-sets and
# http://www.iana.org/assignments/ianacharset-mib
#
# Canonical Form MIBenum Aliases...
#
US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII
ISO-10646-UTF-1 27 csISO10646UTF1
ISO_646.basic:1983 28 ref csISO646basic1983
INVARIANT 29 csINVARIANT
ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion
BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom
NATS-SEFI 31 iso-ir-8-1 csNATSSEFI
NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD
NATS-DANO 33 iso-ir-9-1 csNATSDANO
NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD
SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish
SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames
KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987
ISO-2022-KR 37 csISO2022KR
EUC-KR 38 csEUCKR EUCKR
ISO-2022-JP 39 csISO2022JP
ISO-2022-JP-2 40 csISO2022JP2
ISO-2022-CN 104
ISO-2022-CN-EXT 105
JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp
JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro
IT 22 iso-ir-15 ISO646-IT csISO15Italian
PT 43 iso-ir-16 ISO646-PT csISO16Portuguese
ES 23 iso-ir-17 ISO646-ES csISO17Spanish
greek7-old 44 iso-ir-18 csISO18Greek7Old
latin-greek 45 iso-ir-19 csISO19LatinGreek
DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German
NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French
Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1
ISO_5427 48 iso-ir-37 csISO5427Cyrillic
JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978
BS_viewdata 50 iso-ir-47 csISO47BSViewdata
INIS 51 iso-ir-49 csISO49INIS
INIS-8 52 iso-ir-50 csISO50INIS8
INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic
ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981
ISO_5428:1980 55 iso-ir-55 csISO5428Greek
GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988
GB_2312-80 57 iso-ir-58 chinese csISO58GB231280
NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1
NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2
NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French
videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1
PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2
ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2
MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian
JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208
greek7 64 iso-ir-88 csISO88Greek7
ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449
iso-ir-90 66 csISO90
JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a
JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b
JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd
JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand
JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd
JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana
ISO_2033-1983 73 iso-ir-98 e13b csISO2033
ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS
ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1
ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2
T.61-7bit 75 iso-ir-102 csISO102T617bit
T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit
ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3
ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4
ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic
CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1
CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2
CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr
ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic
ISO-8859-6-E 81 csISO88596E ISO_8859-6-E
ISO-8859-6-I 82 csISO88596I ISO_8859-6-I
ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7
T.101-G2 83 iso-ir-128 csISO128T101G2
ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8
ISO-8859-8-E 84 csISO88598E ISO_8859-8-E
ISO-8859-8-I 85 csISO88598I ISO_8859-8-I
CSN_369103 86 iso-ir-139 csISO139CSN369103
JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002
ISO_6937-2-add 14 iso-ir-142 csISOTextComm
IEC_P27-1 88 iso-ir-143 csISO143IECP271
ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5
JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian
JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian
ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9
greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT
NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba
ISO_6937-2-25 93 iso-ir-152 csISO6937Add
GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874
ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp
ISO_10367-box 96 iso-ir-155 csISO10367Box
ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10
latin-lap 97 lap iso-ir-158 csISO158Lap
JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990
DS_2089 99 DS2089 ISO646-DK dk csISO646Danish
us-dk 100 csUSDK
dk-us 101 csDKUS
JIS_X0201 15 X0201 csHalfWidthKatakana
KSC5636 102 ISO646-KR csKSC5636
ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2
ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4
DEC-MCS 2008 dec csDECMCS
hp-roman8 2004 roman8 r8 csHPRoman8
macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN
IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037
IBM038 2029 EBCDIC-INT cp038 csIBM038
IBM273 2030 CP273 csIBM273
IBM274 2031 EBCDIC-BE CP274 csIBM274
IBM275 2032 EBCDIC-BR cp275 csIBM275
IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277
IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278
IBM280 2035 CP280 ebcdic-cp-it csIBM280
IBM281 2036 EBCDIC-JP-E cp281 csIBM281
IBM284 2037 CP284 ebcdic-cp-es csIBM284
IBM285 2038 CP285 ebcdic-cp-gb csIBM285
IBM290 2039 cp290 EBCDIC-JP-kana csIBM290
IBM297 2040 cp297 ebcdic-cp-fr csIBM297
IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420
IBM423 2042 cp423 ebcdic-cp-gr csIBM423
IBM424 2043 cp424 ebcdic-cp-he csIBM424
IBM437 2011 cp437 437 csPC8CodePage437
IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500
IBM775 2087 cp775 csPC775Baltic
IBM850 2009 cp850 850 csPC850Multilingual
IBM851 2045 cp851 851 csIBM851
IBM852 2010 cp852 852 csPCp852
IBM855 2046 cp855 855 csIBM855
IBM857 2047 cp857 857 csIBM857
IBM860 2048 cp860 860 csIBM860
IBM861 2049 cp861 861 cp-is csIBM861
IBM862 2013 cp862 862 csPC862LatinHebrew
IBM863 2050 cp863 863 csIBM863
IBM864 2051 cp864 csIBM864
IBM865 2052 cp865 865 csIBM865
IBM866 2086 cp866 866 csIBM866
IBM868 2053 CP868 cp-ar csIBM868
IBM869 2054 cp869 869 cp-gr csIBM869
IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870
IBM871 2056 CP871 ebcdic-cp-is csIBM871
IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880
IBM891 2058 cp891 csIBM891
IBM903 2059 cp903 csIBM903
IBM904 2060 cp904 904 csIBBM904
IBM905 2061 CP905 ebcdic-cp-tr csIBM905
IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918
IBM1026 2063 CP1026 csIBM1026
EBCDIC-AT-DE 2064 csIBMEBCDICATDE
EBCDIC-AT-DE-A 2065 csEBCDICATDEA
EBCDIC-CA-FR 2066 csEBCDICCAFR
EBCDIC-DK-NO 2067 csEBCDICDKNO
EBCDIC-DK-NO-A 2068 csEBCDICDKNOA
EBCDIC-FI-SE 2069 csEBCDICFISE
EBCDIC-FI-SE-A 2070 csEBCDICFISEA
EBCDIC-FR 2071 csEBCDICFR
EBCDIC-IT 2072 csEBCDICIT
EBCDIC-PT 2073 csEBCDICPT
EBCDIC-ES 2074 csEBCDICES
EBCDIC-ES-A 2075 csEBCDICESA
EBCDIC-ES-S 2076 csEBCDICESS
EBCDIC-UK 2077 csEBCDICUK
EBCDIC-US 2078 csEBCDICUS
UNKNOWN-8BIT 2079 csUnknown8BiT
MNEMONIC 2080 csMnemonic
MNEM 2081 csMnem
VISCII 2082 csVISCII
VIQR 2083 csVIQR
KOI8-R 2084 csKOI8R
KOI8-U 2088
IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro
IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro
IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro
IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro
IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro
IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro
IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro
IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro
IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro
IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro
IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro
IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro
Big5-HKSCS 2101
IBM1047 2102 IBM-1047
PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian
Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251
KOI7-switched 2105
UNICODE-1-1 1010 csUnicode11
SCSU 1011
UTF-7 1012
UTF-16BE 1013
UTF-16LE 1014
UTF-16 1015
CESU-8 1016 csCESU-8
UTF-32 1017
UTF-32BE 1018
UTF-32LE 1019
BOCU-1 1020 csBOCU-1
UNICODE-1-1-UTF-7 103 csUnicode11UTF7
UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8
ISO-8859-13 109 8859_13 ISO8859-13
ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14
ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15
ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10
GBK 113 CP936 MS936 windows-936
GB18030 114
OSD_EBCDIC_DF04_15 115
OSD_EBCDIC_DF03_IRV 116
OSD_EBCDIC_DF04_1 117
JIS_Encoding 16 csJISEncoding
Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS
EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP
Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese
ISO-10646-UCS-Basic 1002 csUnicodeASCII
ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646
ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261
ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268
ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276
ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264
ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265
ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1
ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1
ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2
ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5
Adobe-Standard-Encoding 2005 csAdobeStandardEncoding
Ventura-US 2006 csVenturaUS
Ventura-International 2007 csVenturaInternational
PC8-Danish-Norwegian 2012 csPC8DanishNorwegian
PC8-Turkish 2014 csPC8Turkish
IBM-Symbols 2015 csIBMSymbols
IBM-Thai 2016 csIBMThai
HP-Legal 2017 csHPLegal
HP-Pi-font 2018 csHPPiFont
HP-Math8 2019 csHPMath8
Adobe-Symbol-Encoding 2020 csHPPSMath
HP-DeskTop 2021 csHPDesktop
Ventura-Math 2022 csVenturaMath
Microsoft-Publishing 2023 csMicrosoftPublishing
Windows-31J 2024 csWindows31J
GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB
Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE
windows-1250 2250 CP1250 MS-EE
windows-1251 2251 CP1251 MS-CYRL
windows-1252 2252 CP1252 MS-ANSI
windows-1253 2253 CP1253 MS-GREEK
windows-1254 2254 CP1254 MS-TURK
windows-1255 2255
windows-1256 2256 CP1256 MS-ARAB
windows-1257 2257 CP1257 WINBALTRIM
windows-1258 2258
TIS-620 2259
HZ-GB-2312 2085
# Additional encodings not defined by IANA
# Arbitrary allocations
#CP737 3001
#CP853 3002
#CP856 3003
CP874 3004 WINDOWS-874
#CP922 3005
#CP1046 3006
#CP1124 3007
#CP1125 3008 WINDOWS-1125
#CP1129 3009
#CP1133 3010 IBM-CP1133
#CP1161 3011 IBM-1161 IBM1161 CSIBM1161
#CP1162 3012 IBM-1162 IBM1162 CSIBM1162
#CP1163 3013 IBM-1163 IBM1163 CSIBM1163
#GEORGIAN-ACADEMY 3014
#GEORGIAN-PS 3015
#KOI8-RU 3016
#KOI8-T 3017
#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC
#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN
#MACGREEK 3020 X-MAC-GREEK MAC-GREEK
#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW
#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND
#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA
#MACTHAI 3024 X-MAC-THAI MAC-THAI
#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH
#MULELAO-1 3026
# From Unicode Lib
ISO-IR-182 4000
ISO-IR-197 4002
ISO-2022-JP-1 4008
MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC
MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN
MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN
JOHAB 4012
ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11
X-CURRENT 4999 X-SYSTEM
X-ACORN-LATIN1 5001
X-ACORN-FUZZY 5002

View File

@ -174,3 +174,5 @@ fieldset { display: block; border: thin solid #888; margin: 1.12em 0; }
[align=left] { text-align: left; }
[align=center] { text-align: center; }
[align=right] { text-align: right; }
script, style { display: none; }

View File

@ -245,6 +245,12 @@ ifeq ($(TARGET),riscos)
$(eval $(call feature_enabled,SPRITE,-DWITH_SPRITE,,RISC OS sprite rendering))
$(eval $(call feature_enabled,ARTWORKS,-DWITH_ARTWORKS,,ArtWorks rendering))
$(eval $(call feature_enabled,PLUGINS,-DWITH_PLUGIN,,Plugin protocol support))
ifeq ($(HOST),riscos)
$(eval $(call feature_enabled,HUBBUB,-DWITH_HUBBUB,-lhubbub -lparserutils,Hubbub HTML parser))
else
NETSURF_FEATURE_HUBBUB_CFLAGS := -DWITH_HUBBUB
$(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
endif
endif
# ----------------------------------------------------------------------------
@ -267,10 +273,12 @@ ifeq ($(TARGET),gtk)
# define additional CFLAGS and LDFLAGS requirements for pkg-configed libs here
NETSURF_FEATURE_RSVG_CFLAGS := -DWITH_RSVG
NETSURF_FEATURE_ROSPRITE_CFLAGS := -DWITH_NSSPRITE
NETSURF_FEATURE_HUBBUB_CFLAGS := -DWITH_HUBBUB
# add a line similar to below for each optional pkg-configed lib here
$(eval $(call pkg_config_find_and_add,RSVG,librsvg-2.0,SVG rendering))
$(eval $(call pkg_config_find_and_add,ROSPRITE,librosprite,RISC OS sprite rendering))
$(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
GTKCFLAGS := -std=c99 -Dgtk -Dnsgtk \
-DGTK_DISABLE_DEPRECATED \
@ -399,10 +407,14 @@ ifeq ($(TARGET),debug)
-D_XOPEN_SOURCE=600 \
-D_POSIX_C_SOURCE=200112L \
-D_NETBSD_SOURCE \
$(WARNFLAGS) -I. -I../../libsprite/trunk/ -g $(OPT0FLAGS) \
$(shell $(PKG_CONFIG) --cflags librosprite) \
$(WARNFLAGS) -I. -g $(OPT0FLAGS) \
$(shell xml2-config --cflags)
LDFLAGS += $(shell $(PKG_CONFIG) --libs librosprite)
LDFLAGS += $(shell $(PKG_CONFIG) --libs libxml-2.0 libcurl openssl)
$(eval $(call pkg_config_find_and_add,RSVG,librsvg-2.0,SVG rendering))
$(eval $(call pkg_config_find_and_add,ROSPRITE,librosprite,RISC OS sprite rendering))
$(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
$(eval $(call pkg_config_find_and_add,HUBBUB,libparserutils,Hubbub HTML parser))
endif
# ----------------------------------------------------------------------------

View File

@ -53,6 +53,10 @@ NETSURF_USE_LIBICONV_PLUG := YES
# ----------------------------------------------------------------------------
ifeq ($(TARGET),riscos)
# Enable using Hubbub to parse HTML rather than libxml2
# Valid options: YES, NO
NETSURF_USE_HUBBUB := YES
# Use James Bursa's libsvgtiny for rendering SVG images
# Valid options: YES, NO
NETSURF_USE_NSSVG := YES
@ -87,6 +91,10 @@ ifeq ($(TARGET),gtk)
# Where to install the netsurf binary
NETSURF_GTK_BIN := /usr/local/bin/
# Enable using Hubbub to parse HTML rather than libxml2
# Valid options: YES, NO, AUTO
NETSURF_USE_HUBBUB := AUTO
# Use librsvg in conjunction with Cairo to render SVG images
# Valid options: YES, NO, AUTO
NETSURF_USE_RSVG := AUTO

View File

@ -21,6 +21,22 @@
#include "render/font.h"
static bool nsfont_width(const struct css_style *style,
const char *string, size_t length, int *width);
static bool nsfont_position_in_string(const struct css_style *style,
const char *string, size_t length,
int x, size_t *char_offset, int *actual_x);
static bool nsfont_split(const struct css_style *style,
const char *string, size_t length,
int x, size_t *char_offset, int *actual_x);
const struct font_functions nsfont = {
nsfont_width,
nsfont_position_in_string,
nsfont_split
};
bool nsfont_width(const struct css_style *style,
const char *string, size_t length,
int *width)
@ -63,3 +79,4 @@ bool nsfont_split(const struct css_style *style,
*actual_x = *char_offset * 10;
return true;
}

View File

@ -31,6 +31,9 @@
#include <gdk/gdkkeysyms.h>
#include <gtk/gtk.h>
#include <glade/glade.h>
#ifdef WITH_HUBBUB
#include <hubbub/hubbub.h>
#endif
#include "content/content.h"
#include "content/fetch.h"
#include "content/fetchers/fetch_curl.h"
@ -165,6 +168,13 @@ static void check_homedir(void)
}
}
static void *myrealloc(void *ptr, size_t len, void *pw)
{
return realloc(ptr, len);
}
void gui_init(int argc, char** argv)
{
char buf[PATH_MAX];
@ -182,6 +192,10 @@ void gui_init(int argc, char** argv)
LOG(("Using '%s' as Resources directory", buf));
res_dir_location = strdup(buf);
find_resource(buf, "Aliases", "./gtk/res/Aliases");
LOG(("Using '%s' as Aliases file", buf));
hubbub_initialise(buf, myrealloc, NULL);
glade_init();
gladeWindows = glade_xml_new(glade_file_location, NULL, NULL);
if (gladeWindows == NULL)

1
gtk/res/Aliases Symbolic link
View File

@ -0,0 +1 @@
../../!NetSurf/Resources/Aliases

View File

@ -27,6 +27,9 @@
#include <stdlib.h>
#include <sys/stat.h>
#include <time.h>
#ifdef WITH_HUBBUB
#include <hubbub/parser.h>
#endif
#include <libxml/HTMLparser.h>
#include "content/content.h"
#include "render/directory.h"
@ -45,7 +48,12 @@ bool directory_create(struct content *c, const char *params[]) {
/* html_create() must have broadcast MSG_ERROR already, so we
* don't need to. */
return false;
#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, header, sizeof(header) - 1, 0);
#else
hubbub_parser_parse_chunk(c->data.html.parser,
(uint8_t *) header, sizeof(header) - 1);
#endif
return true;
}
@ -92,7 +100,11 @@ bool directory_convert(struct content *c, int width, int height) {
"<body>\n<h1>\nIndex of %s</h1>\n<hr><pre>",
nice_path, nice_path);
free(nice_path);
#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0);
#else
hubbub_parser_parse_chunk(c->data.html.parser, buffer, strlen(buffer));
#endif
res = url_parent(c->url, &up);
if (res == URL_FUNC_OK) {
@ -100,8 +112,13 @@ bool directory_convert(struct content *c, int width, int height) {
if ((res == URL_FUNC_OK) && !compare) {
snprintf(buffer, sizeof(buffer),
"<a href=\"..\">[..]</a>\n");
#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer,
strlen(buffer), 0);
#else
hubbub_parser_parse_chunk(c->data.html.parser,
buffer, strlen(buffer));
#endif
}
free(up);
}
@ -118,11 +135,21 @@ bool directory_convert(struct content *c, int width, int height) {
snprintf(buffer, sizeof(buffer), "<a href=\"%s/%s\">%s</a>\n",
c->url, entry->d_name, entry->d_name);
#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0);
#else
hubbub_parser_parse_chunk(c->data.html.parser,
buffer, strlen(buffer));
#endif
}
closedir(parent);
#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, footer, sizeof(footer) - 1, 0);
#else
hubbub_parser_parse_chunk(c->data.html.parser,
(uint8_t *) footer, sizeof(footer) - 1);
#endif
c->type = CONTENT_HTML;
return html_convert(c, width, height);
}

View File

@ -20,12 +20,21 @@
* Content for text/html (implementation).
*/
#define _GNU_SOURCE /* for strndup() */
#include <assert.h>
#include <ctype.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#ifdef WITH_HUBBUB
#include <hubbub/hubbub.h>
#include <hubbub/parser.h>
#include <hubbub/tree.h>
#endif
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include "utils/config.h"
#include "content/content.h"
@ -87,6 +96,380 @@ static const char empty_document[] =
"</html>";
#ifdef WITH_HUBBUB
#define NUM_NAMESPACES 7
const char const *ns_prefixes[NUM_NAMESPACES] =
{ NULL, NULL, "math", "svg", "xlink", "xml", "xmlns" };
const char const *ns_urls[NUM_NAMESPACES] = {
NULL,
"http://www.w3.org/1999/xhtml",
"http://www.w3.org/1998/Math/MathML",
"http://www.w3.org/2000/svg",
"http://www.w3.org/1999/xlink",
"http://www.w3.org/XML/1998/namespace",
"http://www.w3.org/2000/xmlns/"
};
xmlNs *ns_ns[NUM_NAMESPACES];
static int create_comment(void *ctx, const hubbub_string *data, void **result);
static int create_doctype(void *ctx, const hubbub_doctype *doctype,
void **result);
static int create_element(void *ctx, const hubbub_tag *tag, void **result);
static int create_text(void *ctx, const hubbub_string *data, void **result);
static int ref_node(void *ctx, void *node);
static int unref_node(void *ctx, void *node);
static int append_child(void *ctx, void *parent, void *child, void **result);
static int insert_before(void *ctx, void *parent, void *child, void *ref_child,
void **result);
static int remove_child(void *ctx, void *parent, void *child, void **result);
static int clone_node(void *ctx, void *node, bool deep, void **result);
static int reparent_children(void *ctx, void *node, void *new_parent);
static int get_parent(void *ctx, void *node, bool element_only, void **result);
static int has_children(void *ctx, void *node, bool *result);
static int form_associate(void *ctx, void *form, void *node);
static int add_attributes(void *ctx, void *node,
const hubbub_attribute *attributes, uint32_t n_attributes);
static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
static int change_encoding(void *ctx, const char *mibenum);
static hubbub_tree_handler tree_handler = {
create_comment,
create_doctype,
create_element,
create_text,
ref_node,
unref_node,
append_child,
insert_before,
remove_child,
clone_node,
reparent_children,
get_parent,
has_children,
form_associate,
add_attributes,
set_quirks_mode,
change_encoding,
NULL
};
/*** Tree construction functions ***/
int create_comment(void *ctx, const hubbub_string *data, void **result)
{
xmlNode *node = xmlNewComment(NULL);
node->content = xmlStrndup(data->ptr, data->len);
node->_private = (void *)1;
*result = node;
return 0;
}
int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
{
/* Make a node that doesn't really exist, then don't append it
* later. */
xmlNode *node = xmlNewComment(NULL);
node->_private = (void *)1;
*result = node;
return 0;
}
int create_element(void *ctx, const hubbub_tag *tag, void **result)
{
struct content *c = ctx;
struct content_html_data *html = &c->data.html;
char *name = strndup((const char *) tag->name.ptr,
tag->name.len);
xmlNode *node = xmlNewNode(NULL, BAD_CAST name);
node->_private = (void *)1;
*result = node;
if (html->firstelem == true) {
for (size_t i = 1; i < NUM_NAMESPACES; i++) {
ns_ns[i] = xmlNewNs(node,
BAD_CAST ns_urls[i],
BAD_CAST ns_prefixes[i]);
}
html->firstelem = false;
}
xmlSetNs(node, ns_ns[tag->ns]);
free(name);
for (size_t i = 0; i < tag->n_attributes; i++) {
hubbub_attribute *attr = &tag->attributes[i];
char *name = strndup((const char *) attr->name.ptr,
attr->name.len);
char *value = strndup((const char *) attr->value.ptr,
attr->value.len);
if (attr->ns == HUBBUB_NS_NULL) {
xmlNewProp(node, BAD_CAST name, BAD_CAST value);
} else {
xmlNewNsProp(node, ns_ns[attr->ns], BAD_CAST name,
BAD_CAST value);
}
free(name);
free(value);
}
return 0;
}
int create_text(void *ctx, const hubbub_string *data, void **result)
{
xmlNode *node = xmlNewTextLen(BAD_CAST data->ptr, data->len);
node->_private = (void *)1;
*result = node;
return 0;
}
int ref_node(void *ctx, void *node)
{
xmlNode *n = node;
n->_private = (void *)((uintptr_t)n->_private + 1);
return 0;
}
int unref_node(void *ctx, void *node)
{
xmlNode *n = node;
n->_private = (void *)((uintptr_t)n->_private - 1);
if (n->_private == (void *)0 && n->parent == NULL) {
xmlFreeNode(n);
}
return 0;
}
int append_child(void *ctx, void *parent, void *child, void **result)
{
xmlNode *nparent = parent;
xmlNode *nchild = child;
if (nchild->type == XML_TEXT_NODE &&
nparent->last != NULL &&
nparent->last->type == XML_TEXT_NODE) {
xmlNode *clone;
clone_node(ctx, nchild, false, (void **) &clone);
*result = xmlAddChild(parent, clone);
/* node referenced by clone_node */
} else {
*result = xmlAddChild(parent, child);
ref_node(ctx, *result);
}
return 0;
}
/* insert 'child' before 'ref_child', under 'parent' */
int insert_before(void *ctx, void *parent, void *child, void *ref_child,
void **result)
{
*result = xmlAddPrevSibling(ref_child, child);
ref_node(ctx, *result);
return 0;
}
int remove_child(void *ctx, void *parent, void *child, void **result)
{
xmlUnlinkNode(child);
*result = child;
ref_node(ctx, *result);
return 0;
}
int clone_node(void *ctx, void *node, bool deep, void **result)
{
xmlNode *n = xmlCopyNode(node, deep ? 1 : 2);
n->_private = (void *)1;
*result = n;
return 0;
}
/* Take all of the child nodes of "node" and append them to "new_parent" */
int reparent_children(void *ctx, void *node, void *new_parent)
{
xmlNode *n = (xmlNode *) node;
xmlNode *p = (xmlNode *) new_parent;
for (xmlNode *child = n->children; child != NULL; ) {
xmlNode *next = child->next;
xmlUnlinkNode(child);
if (xmlAddChild(p, child) == NULL)
return 1;
child = next;
}
return 0;
}
int get_parent(void *ctx, void *node, bool element_only, void **result)
{
*result = ((xmlNode *)node)->parent;
if (*result != NULL && element_only &&
((xmlNode *) *result)->type != XML_ELEMENT_NODE)
*result = NULL;
if (*result != NULL)
ref_node(ctx, *result);
return 0;
}
int has_children(void *ctx, void *node, bool *result)
{
*result = ((xmlNode *)node)->children ? true : false;
return 0;
}
int form_associate(void *ctx, void *form, void *node)
{
return 0;
}
int add_attributes(void *ctx, void *node,
const hubbub_attribute *attributes, uint32_t n_attributes)
{
for (size_t i = 0; i < n_attributes; i++) {
const hubbub_attribute *attr = &attributes[i];
char *name = strndup((const char *) attr->name.ptr,
attr->name.len);
char *value = strndup((const char *) attr->value.ptr,
attr->value.len);
if (attr->ns == HUBBUB_NS_NULL) {
xmlNewProp(node, BAD_CAST name, BAD_CAST value);
} else {
xmlNewNsProp(node, ns_ns[attr->ns], BAD_CAST name,
BAD_CAST value);
}
free(name);
free(value);
}
return 0;
}
int set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
{
return 0;
}
int change_encoding(void *ctx, const char *name)
{
struct content *c = ctx;
struct content_html_data *html = &c->data.html;
/* If we have an encoding here, it means we are *certain* */
if (html->encoding) {
return 0;
}
/* Find the confidence otherwise (can only be from a BOM) */
uint32_t source;
const char *charset = hubbub_parser_read_charset(html->parser, &source);
if (source == HUBBUB_CHARSET_CONFIDENT) {
html->encoding_source = ENCODING_SOURCE_DETECTED;
html->encoding = (char *) charset;
return 0;
}
/* So here we have something of confidence tentative... */
/* http://www.whatwg.org/specs/web-apps/current-work/#change */
/* 2. "If the new encoding is identical or equivalent to the encoding
* that is already being used to interpret the input stream, then set
* the confidence to confident and abort these steps." */
/* Whatever happens, the encoding should be set here; either for
* reprocessing with a different charset, or for confirming that the
* charset is in fact correct */
html->encoding = (char *) name;
html->encoding_source = ENCODING_SOURCE_META;
/* Equal encodings will have the same string pointers */
return (charset == name) ? 0 : 1;
}
/**
* Talloc'd-up allocation hook for Hubbub.
*/
static void *html_hubbub_realloc(void *ptr, size_t len, void *pw)
{
return talloc_realloc_size(pw, ptr, len);
}
/**
* Create, set up, and whatnot, a Hubbub parser instance, along with the
* relevant libxml2 bits.
*/
static int html_create_parser(struct content *c)
{
struct content_html_data *html = &c->data.html;
hubbub_parser_optparams param;
html->parser = hubbub_parser_create(html->encoding,
html_hubbub_realloc,
c);
if (!html->parser)
return 1;
html->document = xmlNewDoc(BAD_CAST "1.0");
if (!html->document)
return 1;
html->tree_handler = tree_handler;
html->tree_handler.ctx = c;
param.tree_handler = &html->tree_handler;
hubbub_parser_setopt(html->parser, HUBBUB_PARSER_TREE_HANDLER, &param);
param.document_node = html->document;
hubbub_parser_setopt(html->parser, HUBBUB_PARSER_DOCUMENT_NODE, &param);
return 0;
}
#endif
/**
* Create a CONTENT_HTML.
*
@ -101,6 +484,10 @@ bool html_create(struct content *c, const char *params[])
union content_msg_data msg_data;
html->parser = 0;
#ifdef WITH_HUBBUB
html->document = 0;
html->firstelem = true;
#endif
html->encoding_handler = 0;
html->encoding = 0;
html->getenc = true;
@ -135,16 +522,26 @@ bool html_create(struct content *c, const char *params[])
}
}
#ifndef WITH_HUBBUB
html->parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0,
XML_CHAR_ENCODING_NONE);
if (!html->parser)
goto no_memory;
#else
/* Set up the parser, libxml2 document, and that */
if (html_create_parser(c) != 0)
goto no_memory;
#endif
#ifndef WITH_HUBBUB
if (html->encoding) {
/* an encoding was specified in the Content-Type header */
if (!html_set_parser_encoding(c, html->encoding))
return false;
}
#endif
return true;
@ -165,6 +562,7 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
{
unsigned long x;
#ifndef WITH_HUBBUB
if (c->data.html.getenc) {
/* No encoding was specified in the Content-Type header.
* Attempt to detect if the encoding is not 8-bit. If the
@ -190,13 +588,36 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
if (size == 0)
return true;
}
#endif
#ifdef WITH_HUBBUB
hubbub_error err;
#endif
for (x = 0; x + CHUNK <= size; x += CHUNK) {
#ifdef WITH_HUBBUB
err = hubbub_parser_parse_chunk(
c->data.html.parser, data + x, CHUNK);
if (err == HUBBUB_ENCODINGCHANGE) {
goto encoding_change;
}
#else
htmlParseChunk(c->data.html.parser, data + x, CHUNK, 0);
#endif
gui_multitask();
}
htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0);
#ifdef WITH_HUBBUB
err = hubbub_parser_parse_chunk(
c->data.html.parser, data + x, (size - x));
if (err == HUBBUB_ENCODINGCHANGE) {
goto encoding_change;
}
#else
htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0);
#endif
#ifndef WITH_HUBBUB
if (!c->data.html.encoding && c->data.html.parser->input->encoding) {
/* The encoding was not in headers or detected,
* and the parser found a <meta http-equiv="content-type"
@ -259,8 +680,36 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
if (!html_process_data(c, c->source_data, c->source_size))
return false;
}
#endif
return true;
#ifdef WITH_HUBBUB
encoding_change:
/* Free up hubbub, libxml2 etc */
hubbub_parser_destroy(c->data.html.parser);
if (c->data.html.document) {
xmlFreeDoc(c->data.html.document);
}
/* Set up the parser, libxml2 document, and that */
if (html_create_parser(c) != 0) {
union content_msg_data msg_data;
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
/* Recurse to reprocess all that data. This is safe because
* the encoding is now specified at parser-start which means
* it cannot be changed again. */
return html_process_data(c, c->source_data, c->source_size);
#endif
}
@ -274,6 +723,7 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
bool html_set_parser_encoding(struct content *c, const char *encoding)
{
#ifndef WITH_HUBBUB
struct content_html_data *html = &c->data.html;
xmlError *error;
char error_message[500];
@ -322,6 +772,7 @@ bool html_set_parser_encoding(struct content *c, const char *encoding)
/* Ensure noone else attempts to reset the encoding */
html->getenc = false;
#endif
return true;
}
@ -412,14 +863,28 @@ bool html_convert(struct content *c, int width, int height)
/* finish parsing */
if (c->source_size == 0)
#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, empty_document,
sizeof empty_document, 0);
#else
hubbub_parser_parse_chunk(c->data.html.parser,
(uint8_t *) empty_document,
sizeof empty_document);
#endif
#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, "", 0, 1);
document = c->data.html.parser->myDoc;
/*xmlDebugDumpDocument(stderr, c->data.html.parser->myDoc);*/
htmlFreeParserCtxt(c->data.html.parser);
c->data.html.parser = 0;
#else
hubbub_parser_completed(c->data.html.parser);
hubbub_parser_destroy(c->data.html.parser);
c->data.html.parser = 0;
document = c->data.html.document;
/*xmlDebugDumpDocument(stderr, document);*/
#endif
if (!document) {
LOG(("Parsing failed"));
msg_data.error = messages_get("ParsingFail");
@ -1733,7 +2198,11 @@ void html_destroy(struct content *c)
}
if (c->data.html.parser)
#ifndef WITH_HUBBUB
htmlFreeParserCtxt(c->data.html.parser);
#else
hubbub_parser_destroy(c->data.html.parser);
#endif
/* Free base target */
if (c->data.html.base_target) {

View File

@ -26,6 +26,10 @@
#define _NETSURF_RENDER_HTML_H_
#include <stdbool.h>
#ifdef WITH_HUBBUB
#include <hubbub/parser.h>
#include <hubbub/tree.h>
#endif
#include <libxml/HTMLparser.h>
#include "content/content_type.h"
#include "css/css.h"
@ -114,11 +118,19 @@ struct content_html_iframe {
/** Data specific to CONTENT_HTML. */
struct content_html_data {
#ifndef WITH_HUBBUB
htmlParserCtxt *parser; /**< HTML parser context. */
#else
hubbub_parser *parser; /**< HTML parser context. */
hubbub_tree_handler tree_handler;
xmlDoc *document;
bool firstelem;
#endif
/** HTML parser encoding handler. */
xmlCharEncodingHandler *encoding_handler;
char *encoding; /**< Encoding of source, 0 if unknown. */
char *encoding; /**< Encoding of source, 0 if unknown. */
enum { ENCODING_SOURCE_HEADER, ENCODING_SOURCE_DETECTED,
ENCODING_SOURCE_META } encoding_source;
/**< Source of encoding information. */