blob: f2c8d9e6af524a85f5826273aa4eea6b2b18bf33 [file] [log] [blame]
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +01001# Copyright 2016 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Copybara854996b2021-09-07 19:36:02 +00004
5"""Tests for the filecontent module."""
6from __future__ import print_function
7from __future__ import division
8from __future__ import absolute_import
9
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010010import six
Copybara854996b2021-09-07 19:36:02 +000011import unittest
12
13from framework import filecontent
14
15
16class MimeTest(unittest.TestCase):
17 """Test methods for the mime module."""
18
19 _TEST_EXTENSIONS_TO_CTYPES = {
20 'html': 'text/plain',
21 'htm': 'text/plain',
22 'jpg': 'image/jpeg',
23 'jpeg': 'image/jpeg',
24 'pdf': 'application/pdf',
25 }
26
27 _CODE_EXTENSIONS = [
28 'py', 'java', 'mf', 'bat', 'sh', 'php', 'vb', 'pl', 'sql',
29 'patch', 'diff',
30 ]
31
32 def testCommonExtensions(self):
33 """Tests some common extensions for their expected content types."""
34 for ext, ctype in self._TEST_EXTENSIONS_TO_CTYPES.items():
35 self.assertEqual(
36 filecontent.GuessContentTypeFromFilename('file.%s' % ext),
37 ctype)
38
39 def testCaseDoesNotMatter(self):
40 """Ensure that case (upper/lower) of extension does not matter."""
41 for ext, ctype in self._TEST_EXTENSIONS_TO_CTYPES.items():
42 ext = ext.upper()
43 self.assertEqual(
44 filecontent.GuessContentTypeFromFilename('file.%s' % ext),
45 ctype)
46
47 for ext in self._CODE_EXTENSIONS:
48 ext = ext.upper()
49 self.assertEqual(
50 filecontent.GuessContentTypeFromFilename('code.%s' % ext),
51 'text/plain')
52
53 def testCodeIsText(self):
54 """Ensure that code extensions are text/plain."""
55 for ext in self._CODE_EXTENSIONS:
56 self.assertEqual(
57 filecontent.GuessContentTypeFromFilename('code.%s' % ext),
58 'text/plain')
59
60 def testNoExtensionIsText(self):
61 """Ensure that no extension indicates text/plain."""
62 self.assertEqual(
63 filecontent.GuessContentTypeFromFilename('noextension'),
64 'text/plain')
65
66 def testUnknownExtension(self):
67 """Ensure that an obviously unknown extension returns is binary."""
68 self.assertEqual(
69 filecontent.GuessContentTypeFromFilename('f.madeupextension'),
70 'application/octet-stream')
71
72 def testNoShockwaveFlash(self):
73 """Ensure that Shockwave files will NOT be served w/ that content type."""
74 self.assertEqual(
75 filecontent.GuessContentTypeFromFilename('bad.swf'),
76 'application/octet-stream')
77
78
79class DecodeFileContentsTest(unittest.TestCase):
80
81 def IsBinary(self, contents):
82 _contents, is_binary, _is_long = (
83 filecontent.DecodeFileContents(contents))
84 return is_binary
85
86 def testFileIsBinaryEmpty(self):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010087 self.assertFalse(self.IsBinary(b''))
Copybara854996b2021-09-07 19:36:02 +000088
89 def testFileIsBinaryShortText(self):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +010090 self.assertFalse(self.IsBinary(b'This is some plain text.'))
Copybara854996b2021-09-07 19:36:02 +000091
92 def testLineLengthDetection(self):
93 unicode_str = (
94 u'Some non-ascii chars - '
95 u'\xa2\xfa\xb6\xe7\xfc\xea\xd0\xf4\xe6\xf0\xce\xf6\xbe')
96 short_line = unicode_str.encode('iso-8859-1')
97 long_line = (unicode_str * 100)[:filecontent._MAX_SOURCE_LINE_LEN_LOWER+1]
98 long_line = long_line.encode('iso-8859-1')
99
100 lines = [short_line] * 100
101 lines.append(long_line)
102
103 # High lower ratio - text
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100104 self.assertFalse(self.IsBinary(b'\n'.join(lines)))
Copybara854996b2021-09-07 19:36:02 +0000105
106 lines.extend([long_line] * 99)
107
108 # 50/50 lower/upper ratio - binary
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100109 self.assertTrue(self.IsBinary(b'\n'.join(lines)))
Copybara854996b2021-09-07 19:36:02 +0000110
111 # Single line too long - binary
112 lines = [short_line] * 100
113 lines.append(short_line * 100) # Very long line
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100114 self.assertTrue(self.IsBinary(b'\n'.join(lines)))
Copybara854996b2021-09-07 19:36:02 +0000115
116 def testFileIsBinaryLongText(self):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100117 self.assertFalse(self.IsBinary(b'This is plain text. \n' * 100))
Copybara854996b2021-09-07 19:36:02 +0000118 # long utf-8 lines are OK
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100119 self.assertFalse(self.IsBinary(b'This one long line. ' * 100))
Copybara854996b2021-09-07 19:36:02 +0000120
121 def testFileIsBinaryLongBinary(self):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100122 if six.PY2:
123 bin_string = ''.join([chr(c) for c in range(122, 252)])
124 else:
125 bin_string = bytes(range(122, 252))
Copybara854996b2021-09-07 19:36:02 +0000126 self.assertTrue(self.IsBinary(bin_string * 100))
127
128 def testFileIsTextByPath(self):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100129 if six.PY2:
130 bin_string = ''.join([chr(c) for c in range(122, 252)] * 100)
131 else:
132 bin_string = bytes(range(122, 252)) * 100
Copybara854996b2021-09-07 19:36:02 +0000133 unicode_str = (
134 u'Some non-ascii chars - '
135 u'\xa2\xfa\xb6\xe7\xfc\xea\xd0\xf4\xe6\xf0\xce\xf6\xbe')
136 long_line = (unicode_str * 100)[:filecontent._MAX_SOURCE_LINE_LEN_LOWER+1]
137 long_line = long_line.encode('iso-8859-1')
138
139 for contents in [bin_string, long_line]:
140 self.assertTrue(filecontent.DecodeFileContents(contents, path=None)[1])
141 self.assertTrue(filecontent.DecodeFileContents(contents, path='')[1])
142 self.assertTrue(filecontent.DecodeFileContents(contents, path='foo')[1])
143 self.assertTrue(
144 filecontent.DecodeFileContents(contents, path='foo.bin')[1])
145 self.assertTrue(
146 filecontent.DecodeFileContents(contents, path='foo.zzz')[1])
147 for path in ['a/b/Makefile.in', 'README', 'a/file.js', 'b.txt']:
148 self.assertFalse(
149 filecontent.DecodeFileContents(contents, path=path)[1])
150
151 def testFileIsBinaryByCommonExtensions(self):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100152 contents = b'this is not examined'
Copybara854996b2021-09-07 19:36:02 +0000153 self.assertTrue(filecontent.DecodeFileContents(
154 contents, path='junk.zip')[1])
155 self.assertTrue(filecontent.DecodeFileContents(
156 contents, path='JUNK.ZIP')[1])
157 self.assertTrue(filecontent.DecodeFileContents(
158 contents, path='/build/HelloWorld.o')[1])
159 self.assertTrue(filecontent.DecodeFileContents(
160 contents, path='/build/Hello.class')[1])
161 self.assertTrue(filecontent.DecodeFileContents(
162 contents, path='/trunk/libs.old/swing.jar')[1])
163
164 self.assertFalse(filecontent.DecodeFileContents(
165 contents, path='HelloWorld.cc')[1])
166 self.assertFalse(filecontent.DecodeFileContents(
167 contents, path='Hello.java')[1])
168 self.assertFalse(filecontent.DecodeFileContents(
169 contents, path='README')[1])
170 self.assertFalse(filecontent.DecodeFileContents(
171 contents, path='READ.ME')[1])
172 self.assertFalse(filecontent.DecodeFileContents(
173 contents, path='README.txt')[1])
174 self.assertFalse(filecontent.DecodeFileContents(
175 contents, path='README.TXT')[1])
176 self.assertFalse(filecontent.DecodeFileContents(
177 contents, path='/trunk/src/com/monorail/Hello.java')[1])
178 self.assertFalse(filecontent.DecodeFileContents(
179 contents, path='/branches/1.2/resource.el')[1])
180 self.assertFalse(filecontent.DecodeFileContents(
181 contents, path='/wiki/PageName.wiki')[1])
182
183 def testUnreasonablyLongFile(self):
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100184 contents = b'\n' * (filecontent.SOURCE_FILE_MAX_LINES + 2)
Copybara854996b2021-09-07 19:36:02 +0000185 _contents, is_binary, is_long = filecontent.DecodeFileContents(
186 contents)
187 self.assertFalse(is_binary)
188 self.assertTrue(is_long)
189
Adrià Vilanova Martínezf19ea432024-01-23 20:20:52 +0100190 contents = b'\n' * 100
Copybara854996b2021-09-07 19:36:02 +0000191 _contents, is_binary, is_long = filecontent.DecodeFileContents(
192 contents)
193 self.assertFalse(is_binary)
194 self.assertFalse(is_long)