Browse Source

2017-12-10 列表页 ok

jtahstu 1 year ago
parent
commit
aa100bd1a8

+ 4 - 0
.idea/misc.xml

@@ -0,0 +1,4 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<project version="4">
3
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.0 (/Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6)" project-jdk-type="Python SDK" />
4
+</project>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<project version="4">
3
+  <component name="ProjectModuleManager">
4
+    <modules>
5
+      <module fileurl="file://$PROJECT_DIR$/.idea/www_zhipin_com.iml" filepath="$PROJECT_DIR$/.idea/www_zhipin_com.iml" />
6
+    </modules>
7
+  </component>
8
+</project>

+ 359 - 0
.idea/workspace.xml

@@ -0,0 +1,359 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<project version="4">
3
+  <component name="ChangeListManager">
4
+    <list default="true" id="28855cfc-f511-4773-979a-c721c4e672b8" name="Default" comment="" />
5
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
6
+    <option name="TRACKING_ENABLED" value="true" />
7
+    <option name="SHOW_DIALOG" value="false" />
8
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
9
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
10
+    <option name="LAST_RESOLUTION" value="IGNORE" />
11
+  </component>
12
+  <component name="CoverageDataManager">
13
+    <SUITE FILE_PATH="coverage/www_zhipin_com$detail.coverage" NAME="detail Coverage Results" MODIFIED="1512838084253" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/www_zhipin_com/analyse" />
14
+  </component>
15
+  <component name="FileColors">
16
+    <fileColor scope="Non-Project Files (Material Default)" color="2E3C43" />
17
+    <fileColor scope="Non-Project Files (Material Darker)" color="323232" />
18
+    <fileColor scope="Non-Project Files (Material Lighter)" color="eae8e8" />
19
+    <fileColor scope="Non-Project Files (Material Palenight)" color="2f2e43" />
20
+  </component>
21
+  <component name="FileEditorManager">
22
+    <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
23
+      <file leaf-file-name="zhipin_spider.py" pinned="false" current-in-tab="false">
24
+        <entry file="file://$PROJECT_DIR$/www_zhipin_com/spiders/zhipin_spider.py">
25
+          <provider selected="true" editor-type-id="text-editor">
26
+            <state relative-caret-position="-1040">
27
+              <caret line="8" column="88" lean-forward="true" selection-start-line="7" selection-start-column="4" selection-end-line="8" selection-end-column="88" />
28
+              <folding>
29
+                <element signature="e#24#37#0" expanded="true" />
30
+              </folding>
31
+            </state>
32
+          </provider>
33
+        </entry>
34
+      </file>
35
+      <file leaf-file-name="settings.py" pinned="false" current-in-tab="false">
36
+        <entry file="file://$PROJECT_DIR$/www_zhipin_com/settings.py">
37
+          <provider selected="true" editor-type-id="text-editor">
38
+            <state relative-caret-position="280">
39
+              <caret line="81" column="0" lean-forward="true" selection-start-line="81" selection-start-column="0" selection-end-line="81" selection-end-column="0" />
40
+              <folding />
41
+            </state>
42
+          </provider>
43
+        </entry>
44
+      </file>
45
+      <file leaf-file-name="items.json" pinned="false" current-in-tab="true">
46
+        <entry file="file://$PROJECT_DIR$/items.json">
47
+          <provider selected="true" editor-type-id="text-editor">
48
+            <state relative-caret-position="240">
49
+              <caret line="10" column="28" lean-forward="true" selection-start-line="10" selection-start-column="28" selection-end-line="10" selection-end-column="28" />
50
+              <folding />
51
+            </state>
52
+          </provider>
53
+        </entry>
54
+      </file>
55
+      <file leaf-file-name="items.py" pinned="false" current-in-tab="false">
56
+        <entry file="file://$PROJECT_DIR$/www_zhipin_com/items.py">
57
+          <provider selected="true" editor-type-id="text-editor">
58
+            <state relative-caret-position="360">
59
+              <caret line="15" column="33" lean-forward="false" selection-start-line="15" selection-start-column="33" selection-end-line="15" selection-end-column="33" />
60
+              <folding />
61
+            </state>
62
+          </provider>
63
+        </entry>
64
+      </file>
65
+      <file leaf-file-name="detail.py" pinned="false" current-in-tab="false">
66
+        <entry file="file://$PROJECT_DIR$/www_zhipin_com/analyse/detail.py">
67
+          <provider selected="true" editor-type-id="text-editor">
68
+            <state relative-caret-position="168">
69
+              <caret line="12" column="10" lean-forward="false" selection-start-line="12" selection-start-column="10" selection-end-line="12" selection-end-column="10" />
70
+              <folding>
71
+                <element signature="e#130#155#0" expanded="true" />
72
+              </folding>
73
+            </state>
74
+          </provider>
75
+        </entry>
76
+      </file>
77
+    </leaf>
78
+  </component>
79
+  <component name="FileTemplateManagerImpl">
80
+    <option name="RECENT_TEMPLATES">
81
+      <list>
82
+        <option value="Python Script" />
83
+      </list>
84
+    </option>
85
+  </component>
86
+  <component name="FindInProjectRecents">
87
+    <findStrings>
88
+      <find>enabled</find>
89
+    </findStrings>
90
+  </component>
91
+  <component name="IdeDocumentHistory">
92
+    <option name="CHANGED_PATHS">
93
+      <list>
94
+        <option value="$PROJECT_DIR$/items2.0.json" />
95
+        <option value="$PROJECT_DIR$/item3.0.json" />
96
+        <option value="$PROJECT_DIR$/www_zhipin_com/spiders/zhipin_detail_spider.py" />
97
+        <option value="$PROJECT_DIR$/www_zhipin_com/analyse/detail.py" />
98
+        <option value="$PROJECT_DIR$/www_zhipin_com/items.py" />
99
+        <option value="$PROJECT_DIR$/items.json" />
100
+        <option value="$PROJECT_DIR$/www_zhipin_com/settings.py" />
101
+        <option value="$PROJECT_DIR$/item.json" />
102
+        <option value="$PROJECT_DIR$/items_java.json" />
103
+        <option value="$PROJECT_DIR$/items_python.json" />
104
+        <option value="$PROJECT_DIR$/www_zhipin_com/spiders/zhipin_spider.py" />
105
+      </list>
106
+    </option>
107
+  </component>
108
+  <component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
109
+  <component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
110
+  <component name="JsGulpfileManager">
111
+    <detection-done>true</detection-done>
112
+    <sorting>DEFINITION_ORDER</sorting>
113
+  </component>
114
+  <component name="ProjectFrameBounds">
115
+    <option name="y" value="22" />
116
+    <option name="width" value="1680" />
117
+    <option name="height" value="983" />
118
+  </component>
119
+  <component name="ProjectView">
120
+    <navigator currentView="ProjectPane" proportions="" version="1">
121
+      <flattenPackages />
122
+      <showMembers />
123
+      <showModules />
124
+      <showLibraryContents />
125
+      <hideEmptyPackages />
126
+      <abbreviatePackageNames />
127
+      <autoscrollToSource />
128
+      <autoscrollFromSource />
129
+      <sortByType />
130
+      <manualOrder />
131
+      <foldersAlwaysOnTop value="true" />
132
+    </navigator>
133
+    <panes>
134
+      <pane id="ProjectPane">
135
+        <subPane>
136
+          <expand>
137
+            <path>
138
+              <item name="www_zhipin_com" type="b2602c69:ProjectViewProjectNode" />
139
+              <item name="www_zhipin_com" type="462c0819:PsiDirectoryNode" />
140
+            </path>
141
+            <path>
142
+              <item name="www_zhipin_com" type="b2602c69:ProjectViewProjectNode" />
143
+              <item name="www_zhipin_com" type="462c0819:PsiDirectoryNode" />
144
+              <item name="www_zhipin_com" type="462c0819:PsiDirectoryNode" />
145
+            </path>
146
+            <path>
147
+              <item name="www_zhipin_com" type="b2602c69:ProjectViewProjectNode" />
148
+              <item name="www_zhipin_com" type="462c0819:PsiDirectoryNode" />
149
+              <item name="www_zhipin_com" type="462c0819:PsiDirectoryNode" />
150
+              <item name="spiders" type="462c0819:PsiDirectoryNode" />
151
+            </path>
152
+          </expand>
153
+          <select />
154
+        </subPane>
155
+      </pane>
156
+      <pane id="Scope" />
157
+      <pane id="Scratches" />
158
+    </panes>
159
+  </component>
160
+  <component name="PropertiesComponent">
161
+    <property name="WebServerToolWindowFactoryState" value="false" />
162
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
163
+  </component>
164
+  <component name="RunDashboard">
165
+    <option name="ruleStates">
166
+      <list>
167
+        <RuleState>
168
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
169
+        </RuleState>
170
+        <RuleState>
171
+          <option name="name" value="StatusDashboardGroupingRule" />
172
+        </RuleState>
173
+      </list>
174
+    </option>
175
+  </component>
176
+  <component name="RunManager">
177
+    <configuration name="detail" type="PythonConfigurationType" factoryName="Python" temporary="true">
178
+      <option name="INTERPRETER_OPTIONS" value="" />
179
+      <option name="PARENT_ENVS" value="true" />
180
+      <envs>
181
+        <env name="PYTHONUNBUFFERED" value="1" />
182
+      </envs>
183
+      <option name="SDK_HOME" value="" />
184
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/www_zhipin_com/analyse" />
185
+      <option name="IS_MODULE_SDK" value="true" />
186
+      <option name="ADD_CONTENT_ROOTS" value="true" />
187
+      <option name="ADD_SOURCE_ROOTS" value="true" />
188
+      <module name="www_zhipin_com" />
189
+      <EXTENSION ID="PythonCoverageRunConfigurationExtension" enabled="false" sample_coverage="true" runner="coverage.py" />
190
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/www_zhipin_com/analyse/detail.py" />
191
+      <option name="PARAMETERS" value="" />
192
+      <option name="SHOW_COMMAND_LINE" value="false" />
193
+      <option name="EMULATE_TERMINAL" value="false" />
194
+      <option name="MODULE_MODE" value="false" />
195
+    </configuration>
196
+    <recent_temporary>
197
+      <list size="1">
198
+        <item index="0" class="java.lang.String" itemvalue="Python.detail" />
199
+      </list>
200
+    </recent_temporary>
201
+  </component>
202
+  <component name="ShelveChangesManager" show_recycled="false">
203
+    <option name="remove_strategy" value="false" />
204
+  </component>
205
+  <component name="SvnConfiguration">
206
+    <configuration />
207
+  </component>
208
+  <component name="TaskManager">
209
+    <task active="true" id="Default" summary="Default task">
210
+      <changelist id="28855cfc-f511-4773-979a-c721c4e672b8" name="Default" comment="" />
211
+      <created>1512819227081</created>
212
+      <option name="number" value="Default" />
213
+      <option name="presentableId" value="Default" />
214
+      <updated>1512819227081</updated>
215
+    </task>
216
+    <servers />
217
+  </component>
218
+  <component name="ToolWindowManager">
219
+    <frame x="0" y="22" width="1680" height="983" extended-state="0" />
220
+    <layout>
221
+      <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
222
+      <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
223
+      <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32947975" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
224
+      <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
225
+      <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
226
+      <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
227
+      <window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25274727" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
228
+      <window_info id="Docker" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
229
+      <window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
230
+      <window_info id="SciView" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
231
+      <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
232
+      <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
233
+      <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
234
+      <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
235
+      <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
236
+      <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
237
+      <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
238
+      <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
239
+      <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32947975" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
240
+      <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
241
+    </layout>
242
+  </component>
243
+  <component name="TypeScriptGeneratedFilesManager">
244
+    <option name="version" value="1" />
245
+  </component>
246
+  <component name="VcsContentAnnotationSettings">
247
+    <option name="myLimit" value="2678400000" />
248
+  </component>
249
+  <component name="XDebuggerManager">
250
+    <breakpoint-manager />
251
+    <watches-manager />
252
+  </component>
253
+  <component name="editorHistoryManager">
254
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/spiders/zhipin_spider.py">
255
+      <provider selected="true" editor-type-id="text-editor">
256
+        <state relative-caret-position="0">
257
+          <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
258
+          <folding>
259
+            <element signature="e#24#37#0" expanded="true" />
260
+          </folding>
261
+        </state>
262
+      </provider>
263
+    </entry>
264
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/items.py">
265
+      <provider selected="true" editor-type-id="text-editor">
266
+        <state relative-caret-position="384">
267
+          <caret line="16" column="29" lean-forward="true" selection-start-line="16" selection-start-column="29" selection-end-line="16" selection-end-column="29" />
268
+          <folding />
269
+        </state>
270
+      </provider>
271
+    </entry>
272
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/analyse/detail.py">
273
+      <provider selected="true" editor-type-id="text-editor">
274
+        <state relative-caret-position="168">
275
+          <caret line="12" column="10" lean-forward="true" selection-start-line="12" selection-start-column="10" selection-end-line="12" selection-end-column="10" />
276
+          <folding>
277
+            <element signature="e#130#155#0" expanded="true" />
278
+          </folding>
279
+        </state>
280
+      </provider>
281
+    </entry>
282
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/middlewares.py">
283
+      <provider selected="true" editor-type-id="text-editor">
284
+        <state relative-caret-position="-688">
285
+          <caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
286
+        </state>
287
+      </provider>
288
+    </entry>
289
+    <entry file="file://$USER_HOME$/Library/Caches/PyCharm2017.3/python_stubs/-2010535109/builtins.py">
290
+      <provider selected="true" editor-type-id="text-editor">
291
+        <state relative-caret-position="274">
292
+          <caret line="488" column="4" lean-forward="false" selection-start-line="488" selection-start-column="4" selection-end-line="488" selection-end-column="4" />
293
+        </state>
294
+      </provider>
295
+    </entry>
296
+    <entry file="file://$PROJECT_DIR$/items2.0.json" />
297
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/spiders/zhipin_detail_spider.py" />
298
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/analyse/detail.py">
299
+      <provider selected="true" editor-type-id="text-editor">
300
+        <state relative-caret-position="168">
301
+          <caret line="12" column="10" lean-forward="false" selection-start-line="12" selection-start-column="10" selection-end-line="12" selection-end-column="10" />
302
+          <folding>
303
+            <element signature="e#130#155#0" expanded="true" />
304
+          </folding>
305
+        </state>
306
+      </provider>
307
+    </entry>
308
+    <entry file="file://$PROJECT_DIR$/item3.0.json" />
309
+    <entry file="file://$PROJECT_DIR$/item.json" />
310
+    <entry file="file://$PROJECT_DIR$/items_java.json">
311
+      <provider selected="true" editor-type-id="text-editor">
312
+        <state relative-caret-position="-127792">
313
+          <caret line="13" column="13" lean-forward="true" selection-start-line="13" selection-start-column="13" selection-end-line="13" selection-end-column="13" />
314
+        </state>
315
+      </provider>
316
+    </entry>
317
+    <entry file="file://$PROJECT_DIR$/items_python.json">
318
+      <provider selected="true" editor-type-id="text-editor">
319
+        <state relative-caret-position="360">
320
+          <caret line="15" column="6" lean-forward="true" selection-start-line="15" selection-start-column="6" selection-end-line="15" selection-end-column="6" />
321
+        </state>
322
+      </provider>
323
+    </entry>
324
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/items.py">
325
+      <provider selected="true" editor-type-id="text-editor">
326
+        <state relative-caret-position="360">
327
+          <caret line="15" column="33" lean-forward="false" selection-start-line="15" selection-start-column="33" selection-end-line="15" selection-end-column="33" />
328
+          <folding />
329
+        </state>
330
+      </provider>
331
+    </entry>
332
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/spiders/zhipin_spider.py">
333
+      <provider selected="true" editor-type-id="text-editor">
334
+        <state relative-caret-position="-1040">
335
+          <caret line="8" column="88" lean-forward="true" selection-start-line="7" selection-start-column="4" selection-end-line="8" selection-end-column="88" />
336
+          <folding>
337
+            <element signature="e#24#37#0" expanded="true" />
338
+          </folding>
339
+        </state>
340
+      </provider>
341
+    </entry>
342
+    <entry file="file://$PROJECT_DIR$/www_zhipin_com/settings.py">
343
+      <provider selected="true" editor-type-id="text-editor">
344
+        <state relative-caret-position="280">
345
+          <caret line="81" column="0" lean-forward="true" selection-start-line="81" selection-start-column="0" selection-end-line="81" selection-end-column="0" />
346
+          <folding />
347
+        </state>
348
+      </provider>
349
+    </entry>
350
+    <entry file="file://$PROJECT_DIR$/items.json">
351
+      <provider selected="true" editor-type-id="text-editor">
352
+        <state relative-caret-position="240">
353
+          <caret line="10" column="28" lean-forward="true" selection-start-line="10" selection-start-column="28" selection-end-line="10" selection-end-column="28" />
354
+          <folding />
355
+        </state>
356
+      </provider>
357
+    </entry>
358
+  </component>
359
+</project>

+ 12 - 0
.idea/www_zhipin_com.iml

@@ -0,0 +1,12 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<module type="PYTHON_MODULE" version="4">
3
+  <component name="NewModuleRootManager">
4
+    <content url="file://$MODULE_DIR$" />
5
+    <orderEntry type="inheritedJdk" />
6
+    <orderEntry type="sourceFolder" forTests="false" />
7
+  </component>
8
+  <component name="TestRunnerService">
9
+    <option name="projectConfiguration" value="Twisted Trial" />
10
+    <option name="PROJECT_TEST_RUNNER" value="Twisted Trial" />
11
+  </component>
12
+</module>

File diff suppressed because it is too large
+ 5322 - 0
items.json


File diff suppressed because it is too large
+ 5366 - 0
items_java.json


File diff suppressed because it is too large
+ 4351 - 0
items_python.json


+ 11 - 0
scrapy.cfg

@@ -0,0 +1,11 @@
1
+# Automatically created by: scrapy startproject
2
+#
3
+# For more information about the [deploy] section see:
4
+# https://scrapyd.readthedocs.org/en/latest/deploy.html
5
+
6
+[settings]
7
+default = www_zhipin_com.settings
8
+
9
+[deploy]
10
+#url = http://localhost:6800/
11
+project = www_zhipin_com

+ 0 - 0
www_zhipin_com/__init__.py


BIN
www_zhipin_com/__pycache__/__init__.cpython-36.pyc


BIN
www_zhipin_com/__pycache__/items.cpython-36.pyc


BIN
www_zhipin_com/__pycache__/settings.cpython-36.pyc


+ 24 - 0
www_zhipin_com/analyse/detail.py

@@ -0,0 +1,24 @@
1
+"""
2
+@author: jtahstu
3
+@contact: root@jtahstu.com
4
+@site: http://www.jtahstu.com
5
+@time: 2017/12/10 00:25
6
+"""
7
+# -*- coding: utf-8 -*-
8
+from pprint import pprint
9
+
10
+import requests
11
+import time
12
+import json
13
+import sys
14
+
15
+
16
+def init():
17
+    f = open("../../item3.0.json", "r", encoding="utf-8")
18
+    items = json.load(f)
19
+    print(len(items))
20
+    pprint(items[1])
21
+
22
+
23
+if __name__ == "__main__":
24
+    init()

+ 26 - 0
www_zhipin_com/items.py

@@ -0,0 +1,26 @@
1
+# -*- coding: utf-8 -*-
2
+
3
+# Define here the models for your scraped items
4
+#
5
+# See documentation in:
6
+# http://doc.scrapy.org/en/latest/topics/items.html
7
+
8
+import scrapy
9
+
10
+
11
+class WwwZhipinComItem(scrapy.Item):
12
+    # define the fields for your item here like:
13
+    # name = scrapy.Field()
14
+    pid = scrapy.Field()
15
+    positionName = scrapy.Field()
16
+    positionLables = scrapy.Field()
17
+    workYear = scrapy.Field()
18
+    salary = scrapy.Field()
19
+    city = scrapy.Field()
20
+    education = scrapy.Field()
21
+    companyShortName = scrapy.Field()
22
+    industryField = scrapy.Field()
23
+    financeStage = scrapy.Field()
24
+    companySize = scrapy.Field()
25
+    time = scrapy.Field()
26
+    updated_at = scrapy.Field()

+ 56 - 0
www_zhipin_com/middlewares.py

@@ -0,0 +1,56 @@
1
+# -*- coding: utf-8 -*-
2
+
3
+# Define here the models for your spider middleware
4
+#
5
+# See documentation in:
6
+# http://doc.scrapy.org/en/latest/topics/spider-middleware.html
7
+
8
+from scrapy import signals
9
+
10
+
11
+class WwwZhipinComSpiderMiddleware(object):
12
+    # Not all methods need to be defined. If a method is not defined,
13
+    # scrapy acts as if the spider middleware does not modify the
14
+    # passed objects.
15
+
16
+    @classmethod
17
+    def from_crawler(cls, crawler):
18
+        # This method is used by Scrapy to create your spiders.
19
+        s = cls()
20
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21
+        return s
22
+
23
+    def process_spider_input(self, response, spider):
24
+        # Called for each response that goes through the spider
25
+        # middleware and into the spider.
26
+
27
+        # Should return None or raise an exception.
28
+        return None
29
+
30
+    def process_spider_output(self, response, result, spider):
31
+        # Called with the results returned from the Spider, after
32
+        # it has processed the response.
33
+
34
+        # Must return an iterable of Request, dict or Item objects.
35
+        for i in result:
36
+            yield i
37
+
38
+    def process_spider_exception(self, response, exception, spider):
39
+        # Called when a spider or process_spider_input() method
40
+        # (from other spider middleware) raises an exception.
41
+
42
+        # Should return either None or an iterable of Response, dict
43
+        # or Item objects.
44
+        pass
45
+
46
+    def process_start_requests(self, start_requests, spider):
47
+        # Called with the start requests of the spider, and works
48
+        # similarly to the process_spider_output() method, except
49
+        # that it doesn’t have a response associated.
50
+
51
+        # Must return only requests (not items).
52
+        for r in start_requests:
53
+            yield r
54
+
55
+    def spider_opened(self, spider):
56
+        spider.logger.info('Spider opened: %s' % spider.name)

+ 11 - 0
www_zhipin_com/pipelines.py

@@ -0,0 +1,11 @@
1
+# -*- coding: utf-8 -*-
2
+
3
+# Define your item pipelines here
4
+#
5
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
6
+# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
7
+
8
+
9
+class WwwZhipinComPipeline(object):
10
+    def process_item(self, item, spider):
11
+        return item

+ 96 - 0
www_zhipin_com/settings.py

@@ -0,0 +1,96 @@
1
+# -*- coding: utf-8 -*-
2
+
3
+# Scrapy settings for www_zhipin_com project
4
+#
5
+# For simplicity, this file contains only settings considered important or
6
+# commonly used. You can find more settings consulting the documentation:
7
+#
8
+#     http://doc.scrapy.org/en/latest/topics/settings.html
9
+#     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
10
+#     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
11
+
12
+BOT_NAME = 'www_zhipin_com'
13
+
14
+SPIDER_MODULES = ['www_zhipin_com.spiders']
15
+NEWSPIDER_MODULE = 'www_zhipin_com.spiders'
16
+
17
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
18
+# USER_AGENT = 'www_zhipin_com (+http://www.yourdomain.com)'
19
+
20
+# Obey robots.txt rules
21
+ROBOTSTXT_OBEY = True
22
+
23
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
24
+# CONCURRENT_REQUESTS = 32
25
+
26
+# Configure a delay for requests for the same website (default: 0)
27
+# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
28
+# See also autothrottle settings and docs
29
+# DOWNLOAD_DELAY = 3
30
+# The download delay setting will honor only one of:
31
+# CONCURRENT_REQUESTS_PER_DOMAIN = 16
32
+# CONCURRENT_REQUESTS_PER_IP = 16
33
+
34
+# Disable cookies (enabled by default)
35
+COOKIES_ENABLED = True
36
+
37
+# Disable Telnet Console (enabled by default)
38
+# TELNETCONSOLE_ENABLED = False
39
+
40
+# Override the default request headers:
41
+# DEFAULT_REQUEST_HEADERS = {
42
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
43
+#   'Accept-Language': 'en',
44
+# }
45
+
46
+# Enable or disable spider middlewares
47
+# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
48
+# SPIDER_MIDDLEWARES = {
49
+#    'www_zhipin_com.middlewares.WwwZhipinComSpiderMiddleware': 543,
50
+# }
51
+
52
+# Enable or disable downloader middlewares
53
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
54
+# DOWNLOADER_MIDDLEWARES = {
55
+#    'www_zhipin_com.middlewares.MyCustomDownloaderMiddleware': 543,
56
+# }
57
+
58
+# Enable or disable extensions
59
+# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
60
+# EXTENSIONS = {
61
+#    'scrapy.extensions.telnet.TelnetConsole': None,
62
+# }
63
+
64
+# Configure item pipelines
65
+# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
66
+# ITEM_PIPELINES = {
67
+#    'www_zhipin_com.pipelines.WwwZhipinComPipeline': 300,
68
+# }
69
+
70
+# Enable and configure the AutoThrottle extension (disabled by default)
71
+# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
72
+AUTOTHROTTLE_ENABLED = True
73
+# The initial download delay
74
+AUTOTHROTTLE_START_DELAY = 5
75
+# The maximum download delay to be set in case of high latencies
76
+AUTOTHROTTLE_MAX_DELAY = 60
77
+# The average number of requests Scrapy should be sending in parallel to
78
+# each remote server
79
+# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
80
+# Enable showing throttling stats for every response received:
81
+AUTOTHROTTLE_DEBUG = False
82
+
83
+# Enable and configure HTTP caching (disabled by default)
84
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
85
+# HTTPCACHE_ENABLED = True
86
+# HTTPCACHE_EXPIRATION_SECS = 0
87
+# HTTPCACHE_DIR = 'httpcache'
88
+# HTTPCACHE_IGNORE_HTTP_CODES = []
89
+# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
90
+
91
+FEED_EXPORT_ENCODING = 'utf-8'
92
+# CLOSESPIDER_ITEMCOUNT = 300
93
+
94
+# scrapy crawl fast -s CLOSESPIDER_ITEMCOUNT=10
95
+# scrapy crawl fast -s CLOSESPIDER_PAGECOUNT=10
96
+# scrapy crawl fast -s CLOSESPIDER_TIMEOUT=10

+ 4 - 0
www_zhipin_com/spiders/__init__.py

@@ -0,0 +1,4 @@
1
+# This package will contain the spiders of your Scrapy project
2
+#
3
+# Please refer to the documentation for information on how to create and manage
4
+# your spiders.

BIN
www_zhipin_com/spiders/__pycache__/__init__.cpython-36.pyc


BIN
www_zhipin_com/spiders/__pycache__/zhipin_spider.cpython-36.pyc


File diff suppressed because it is too large
+ 74 - 0
www_zhipin_com/spiders/zhipin_spider.py