Рассматриваем страницу руководства Core API и читаем там, что есть настройки по умолчанию. А эти настройки можно переопределить при вызове из консоли, в файле settings.py проекта в конкретном пауке (из папки spider). А как менять такие параметры, как proxy из shell? Сразу не разберешь, потому здесь распечатываю файлы дефолтных настроек и фрагменты документации "Core API", "Requests and Responses".
In [5]:
!dir C:\Users\kiss\Anaconda\Lib\site-packages\scrapy\settings
In [1]:
%load C:\\Users\\kiss\\Anaconda\\Lib\\site-packages\\scrapy\\settings\\__init__.py
In []:
import json
from . import default_settings
class Settings(object):
def __init__(self, values=None):
self.values = values.copy() if values else {}
self.global_defaults = default_settings
def __getitem__(self, opt_name):
if opt_name in self.values:
return self.values[opt_name]
return getattr(self.global_defaults, opt_name, None)
def get(self, name, default=None):
return self[name] if self[name] is not None else default
def getbool(self, name, default=False):
"""
True is: 1, '1', True
False is: 0, '0', False, None
"""
return bool(int(self.get(name, default)))
def getint(self, name, default=0):
return int(self.get(name, default))
def getfloat(self, name, default=0.0):
return float(self.get(name, default))
def getlist(self, name, default=None):
value = self.get(name)
if value is None:
return default or []
elif hasattr(value, '__iter__'):
return value
else:
return str(value).split(',')
def getdict(self, name, default=None):
value = self.get(name)
if value is None:
return default or {}
if isinstance(value, basestring):
value = json.loads(value)
if isinstance(value, dict):
return value
raise ValueError("Cannot convert value for setting '%s' to dict: '%s'" % (name, value))
class CrawlerSettings(Settings):
def __init__(self, settings_module=None, **kw):
super(CrawlerSettings, self).__init__(**kw)
self.settings_module = settings_module
self.overrides = {}
self.defaults = {}
def __getitem__(self, opt_name):
if opt_name in self.overrides:
return self.overrides[opt_name]
if self.settings_module and hasattr(self.settings_module, opt_name):
return getattr(self.settings_module, opt_name)
if opt_name in self.defaults:
return self.defaults[opt_name]
return super(CrawlerSettings, self).__getitem__(opt_name)
def __str__(self):
return "<CrawlerSettings module=%r>" % self.settings_module
def iter_default_settings():
"""Return the default settings as an iterator of (name, value) tuples"""
for name in dir(default_settings):
if name.isupper():
yield name, getattr(default_settings, name)
def overridden_settings(settings):
"""Return a dict of the settings that have been overridden"""
for name, defvalue in iter_default_settings():
value = settings[name]
if not isinstance(defvalue, dict) and value != defvalue:
yield name, value
In [2]:
%load C:\\Users\\kiss\\Anaconda\\Lib\\site-packages\\scrapy\\settings\\default_settings.py
In []:
"""
This module contains the default values for all settings used by Scrapy.
For more information about these settings you can read the settings
documentation in docs/topics/settings.rst
Scrapy developers, if you add a setting here remember to:
* add it in alphabetical order
* group similar settings without leaving blank lines
* add its documentation to the available settings documentation
(docs/topics/settings.rst)
"""
import os
import sys
from importlib import import_module
from os.path import join, abspath, dirname
BOT_NAME = 'scrapybot'
CLOSESPIDER_TIMEOUT = 0
CLOSESPIDER_PAGECOUNT = 0
CLOSESPIDER_ITEMCOUNT = 0
CLOSESPIDER_ERRORCOUNT = 0
COMMANDS_MODULE = ''
COMPRESSION_ENABLED = True
CONCURRENT_ITEMS = 100
CONCURRENT_REQUESTS = 16
CONCURRENT_REQUESTS_PER_DOMAIN = 8
CONCURRENT_REQUESTS_PER_IP = 0
COOKIES_ENABLED = True
COOKIES_DEBUG = False
DEFAULT_ITEM_CLASS = 'scrapy.item.Item'
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en',
}
DEPTH_LIMIT = 0
DEPTH_STATS = True
DEPTH_PRIORITY = 0
DNSCACHE_ENABLED = True
DOWNLOAD_DELAY = 0
DOWNLOAD_HANDLERS = {}
DOWNLOAD_HANDLERS_BASE = {
'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler',
'http': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
'https': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler',
's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler',
'ftp': 'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler',
}
DOWNLOAD_TIMEOUT = 180 # 3mins
DOWNLOADER_DEBUG = False
DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
DOWNLOADER_MIDDLEWARES = {}
DOWNLOADER_MIDDLEWARES_BASE = {
# Engine side
'scrapy.contrib.downloadermiddleware.robotstxt.RobotsTxtMiddleware': 100,
'scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware': 300,
'scrapy.contrib.downloadermiddleware.downloadtimeout.DownloadTimeoutMiddleware': 350,
'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': 400,
'scrapy.contrib.downloadermiddleware.retry.RetryMiddleware': 500,
'scrapy.contrib.downloadermiddleware.defaultheaders.DefaultHeadersMiddleware': 550,
'scrapy.contrib.downloadermiddleware.redirect.MetaRefreshMiddleware': 580,
'scrapy.contrib.downloadermiddleware.httpcompression.HttpCompressionMiddleware': 590,
'scrapy.contrib.downloadermiddleware.redirect.RedirectMiddleware': 600,
'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': 700,
'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 750,
'scrapy.contrib.downloadermiddleware.chunked.ChunkedTransferMiddleware': 830,
'scrapy.contrib.downloadermiddleware.stats.DownloaderStats': 850,
'scrapy.contrib.downloadermiddleware.httpcache.HttpCacheMiddleware': 900,
# Downloader side
}
DOWNLOADER_STATS = True
DUPEFILTER_CLASS = 'scrapy.dupefilter.RFPDupeFilter'
try:
EDITOR = os.environ['EDITOR']
except KeyError:
if sys.platform == 'win32':
EDITOR = '%s -m idlelib.idle'
else:
EDITOR = 'vi'
EXTENSIONS = {}
EXTENSIONS_BASE = {
'scrapy.contrib.corestats.CoreStats': 0,
'scrapy.webservice.WebService': 0,
'scrapy.telnet.TelnetConsole': 0,
'scrapy.contrib.memusage.MemoryUsage': 0,
'scrapy.contrib.memdebug.MemoryDebugger': 0,
'scrapy.contrib.closespider.CloseSpider': 0,
'scrapy.contrib.feedexport.FeedExporter': 0,
'scrapy.contrib.logstats.LogStats': 0,
'scrapy.contrib.spiderstate.SpiderState': 0,
'scrapy.contrib.throttle.AutoThrottle': 0,
}
FEED_URI = None
FEED_URI_PARAMS = None # a function to extend uri arguments
FEED_FORMAT = 'jsonlines'
FEED_STORE_EMPTY = False
FEED_STORAGES = {}
FEED_STORAGES_BASE = {
'': 'scrapy.contrib.feedexport.FileFeedStorage',
'file': 'scrapy.contrib.feedexport.FileFeedStorage',
'stdout': 'scrapy.contrib.feedexport.StdoutFeedStorage',
's3': 'scrapy.contrib.feedexport.S3FeedStorage',
'ftp': 'scrapy.contrib.feedexport.FTPFeedStorage',
}
FEED_EXPORTERS = {}
FEED_EXPORTERS_BASE = {
'json': 'scrapy.contrib.exporter.JsonItemExporter',
'jsonlines': 'scrapy.contrib.exporter.JsonLinesItemExporter',
'csv': 'scrapy.contrib.exporter.CsvItemExporter',
'xml': 'scrapy.contrib.exporter.XmlItemExporter',
'marshal': 'scrapy.contrib.exporter.MarshalItemExporter',
'pickle': 'scrapy.contrib.exporter.PickleItemExporter',
}
HTTPCACHE_ENABLED = False
HTTPCACHE_DIR = 'httpcache'
HTTPCACHE_IGNORE_MISSING = False
HTTPCACHE_STORAGE = 'scrapy.contrib.httpcache.DbmCacheStorage'
HTTPCACHE_EXPIRATION_SECS = 0
HTTPCACHE_IGNORE_HTTP_CODES = []
HTTPCACHE_IGNORE_SCHEMES = ['file']
HTTPCACHE_DBM_MODULE = 'anydbm'
HTTPCACHE_POLICY = 'scrapy.contrib.httpcache.DummyPolicy'
ITEM_PROCESSOR = 'scrapy.contrib.pipeline.ItemPipelineManager'
ITEM_PIPELINES = {}
ITEM_PIPELINES_BASE = {}
LOG_ENABLED = True
LOG_ENCODING = 'utf-8'
LOG_FORMATTER = 'scrapy.logformatter.LogFormatter'
LOG_STDOUT = False
LOG_LEVEL = 'DEBUG'
LOG_FILE = None
LOG_UNSERIALIZABLE_REQUESTS = False
LOGSTATS_INTERVAL = 60.0
MAIL_DEBUG = False
MAIL_HOST = 'localhost'
MAIL_PORT = 25
MAIL_FROM = 'scrapy@localhost'
MAIL_PASS = None
MAIL_USER = None
MEMDEBUG_ENABLED = False # enable memory debugging
MEMDEBUG_NOTIFY = [] # send memory debugging report by mail at engine shutdown
MEMUSAGE_ENABLED = False
MEMUSAGE_LIMIT_MB = 0
MEMUSAGE_NOTIFY_MAIL = []
MEMUSAGE_REPORT = False
MEMUSAGE_WARNING_MB = 0
METAREFRESH_ENABLED = True
METAREFRESH_MAXDELAY = 100
NEWSPIDER_MODULE = ''
RANDOMIZE_DOWNLOAD_DELAY = True
REDIRECT_ENABLED = True
REDIRECT_MAX_TIMES = 20 # uses Firefox default setting
REDIRECT_PRIORITY_ADJUST = +2
REFERER_ENABLED = True
RETRY_ENABLED = True
RETRY_TIMES = 2 # initial response + 2 retries = 3 requests
RETRY_HTTP_CODES = [500, 502, 503, 504, 400, 408]
RETRY_PRIORITY_ADJUST = -1
ROBOTSTXT_OBEY = False
SCHEDULER = 'scrapy.core.scheduler.Scheduler'
SCHEDULER_DISK_QUEUE = 'scrapy.squeue.PickleLifoDiskQueue'
SCHEDULER_MEMORY_QUEUE = 'scrapy.squeue.LifoMemoryQueue'
SPIDER_MANAGER_CLASS = 'scrapy.spidermanager.SpiderManager'
SPIDER_MIDDLEWARES = {}
SPIDER_MIDDLEWARES_BASE = {
# Engine side
'scrapy.contrib.spidermiddleware.httperror.HttpErrorMiddleware': 50,
'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': 500,
'scrapy.contrib.spidermiddleware.referer.RefererMiddleware': 700,
'scrapy.contrib.spidermiddleware.urllength.UrlLengthMiddleware': 800,
'scrapy.contrib.spidermiddleware.depth.DepthMiddleware': 900,
# Spider side
}
SPIDER_MODULES = []
STATS_CLASS = 'scrapy.statscol.MemoryStatsCollector'
STATS_DUMP = True
STATSMAILER_RCPTS = []
TEMPLATES_DIR = abspath(join(dirname(__file__), '..', 'templates'))
URLLENGTH_LIMIT = 2083
USER_AGENT = 'Scrapy/%s (+http://scrapy.org)' % import_module('scrapy').__version__
TELNETCONSOLE_ENABLED = 1
TELNETCONSOLE_PORT = [6023, 6073]
TELNETCONSOLE_HOST = '0.0.0.0'
WEBSERVICE_ENABLED = True
WEBSERVICE_LOGFILE = None
WEBSERVICE_PORT = [6080, 7030]
WEBSERVICE_HOST = '0.0.0.0'
WEBSERVICE_RESOURCES = {}
WEBSERVICE_RESOURCES_BASE = {
'scrapy.contrib.webservice.crawler.CrawlerResource': 1,
'scrapy.contrib.webservice.enginestatus.EngineStatusResource': 1,
'scrapy.contrib.webservice.stats.StatsResource': 1,
}
SPIDER_CONTRACTS = {}
SPIDER_CONTRACTS_BASE = {
'scrapy.contracts.default.UrlContract': 1,
'scrapy.contracts.default.ReturnsContract': 2,
'scrapy.contracts.default.ScrapesContract': 3,
}
In [3]:
%load C:\\Users\\kiss\\Anaconda\\Lib\\site-packages\\scrapy\\settings\\deprecated.py
In []:
import warnings
from scrapy.exceptions import ScrapyDeprecationWarning
DEPRECATED_SETTINGS = [
('TRACK_REFS', 'no longer needed (trackref is always enabled)'),
('RESPONSE_CLASSES', 'no longer supported'),
('DEFAULT_RESPONSE_ENCODING', 'no longer supported'),
('BOT_VERSION', 'no longer used (user agent defaults to Scrapy now)'),
('ENCODING_ALIASES', 'no longer needed (encoding discovery uses w3lib now)'),
('STATS_ENABLED', 'no longer supported (change STATS_CLASS instead)'),
('SQLITE_DB', 'no longer supported'),
('SELECTORS_BACKEND', 'use SCRAPY_SELECTORS_BACKEND environment variable instead'),
('AUTOTHROTTLE_MIN_DOWNLOAD_DELAY', 'use DOWNLOAD_DELAY instead'),
('AUTOTHROTTLE_MAX_CONCURRENCY', 'use CONCURRENT_REQUESTS_PER_DOMAIN instead'),
('AUTOTHROTTLE_MAX_CONCURRENCY', 'use CONCURRENT_REQUESTS_PER_DOMAIN instead'),
('REDIRECT_MAX_METAREFRESH_DELAY', 'use METAREFRESH_MAXDELAY instead'),
]
def check_deprecated_settings(settings):
deprecated = [x for x in DEPRECATED_SETTINGS if settings[x[0]] is not None]
if deprecated:
msg = "You are using the following settings which are deprecated or obsolete"
msg += " (ask scrapy-users@googlegroups.com for alternatives):"
msg = msg + "\n " + "\n ".join("%s: %s" % x for x in deprecated)
warnings.warn(msg, ScrapyDeprecationWarning)
Settings can be accessed through the scrapy.crawler.Crawler.settings attribute of the Crawler that is passed to from_crawler method in extensions and middlewares:
In [6]:
%load C:\\Users\\kiss\\Anaconda\\Lib\\site-packages\\scrapy\\crawler.py
In []:
import signal
from twisted.internet import reactor, defer
from scrapy.core.engine import ExecutionEngine
from scrapy.resolver import CachingThreadedResolver
from scrapy.extension import ExtensionManager
from scrapy.signalmanager import SignalManager
from scrapy.utils.ossignal import install_shutdown_handlers, signal_names
from scrapy.utils.misc import load_object
from scrapy import log, signals
class Crawler(object):
def __init__(self, settings):
self.configured = False
self.settings = settings
self.signals = SignalManager(self)
self.stats = load_object(settings['STATS_CLASS'])(self)
self._start_requests = lambda: ()
self._spider = None
# TODO: move SpiderManager to CrawlerProcess
spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
self.spiders = spman_cls.from_crawler(self)
def install(self):
# TODO: remove together with scrapy.project.crawler usage
import scrapy.project
assert not hasattr(scrapy.project, 'crawler'), "crawler already installed"
scrapy.project.crawler = self
def uninstall(self):
# TODO: remove together with scrapy.project.crawler usage
import scrapy.project
assert hasattr(scrapy.project, 'crawler'), "crawler not installed"
del scrapy.project.crawler
def configure(self):
if self.configured:
return
self.configured = True
lf_cls = load_object(self.settings['LOG_FORMATTER'])
self.logformatter = lf_cls.from_crawler(self)
self.extensions = ExtensionManager.from_crawler(self)
self.engine = ExecutionEngine(self, self._spider_closed)
def crawl(self, spider, requests=None):
assert self._spider is None, 'Spider already attached'
self._spider = spider
spider.set_crawler(self)
if requests is None:
self._start_requests = spider.start_requests
else:
self._start_requests = lambda: requests
def _spider_closed(self, spider=None):
if not self.engine.open_spiders:
self.stop()
@defer.inlineCallbacks
def start(self):
yield defer.maybeDeferred(self.configure)
if self._spider:
yield self.engine.open_spider(self._spider, self._start_requests())
yield defer.maybeDeferred(self.engine.start)
@defer.inlineCallbacks
def stop(self):
if self.configured and self.engine.running:
yield defer.maybeDeferred(self.engine.stop)
class CrawlerProcess(object):
""" A class to run multiple scrapy crawlers in a process sequentially"""
def __init__(self, settings):
install_shutdown_handlers(self._signal_shutdown)
self.settings = settings
self.crawlers = {}
self.stopping = False
self._started = None
def create_crawler(self, name=None):
if name not in self.crawlers:
self.crawlers[name] = Crawler(self.settings)
return self.crawlers[name]
def start(self):
if self.start_crawling():
self.start_reactor()
@defer.inlineCallbacks
def stop(self):
self.stopping = True
if self._active_crawler:
yield self._active_crawler.stop()
def _signal_shutdown(self, signum, _):
install_shutdown_handlers(self._signal_kill)
signame = signal_names[signum]
log.msg(format="Received %(signame)s, shutting down gracefully. Send again to force ",
level=log.INFO, signame=signame)
reactor.callFromThread(self.stop)
def _signal_kill(self, signum, _):
install_shutdown_handlers(signal.SIG_IGN)
signame = signal_names[signum]
log.msg(format='Received %(signame)s twice, forcing unclean shutdown',
level=log.INFO, signame=signame)
reactor.callFromThread(self._stop_reactor)
# ------------------------------------------------------------------------#
# The following public methods can't be considered stable and may change at
# any moment.
#
# start_crawling and start_reactor are called from scrapy.commands.shell
# They are splitted because reactor is started on a different thread than IPython shell.
#
def start_crawling(self):
log.scrapy_info(self.settings)
return self._start_crawler() is not None
def start_reactor(self):
if self.settings.getbool('DNSCACHE_ENABLED'):
reactor.installResolver(CachingThreadedResolver(reactor))
reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
reactor.run(installSignalHandlers=False) # blocking call
def _start_crawler(self):
if not self.crawlers or self.stopping:
return
name, crawler = self.crawlers.popitem()
self._active_crawler = crawler
sflo = log.start_from_crawler(crawler)
crawler.configure()
crawler.install()
crawler.signals.connect(crawler.uninstall, signals.engine_stopped)
if sflo:
crawler.signals.connect(sflo.stop, signals.engine_stopped)
crawler.signals.connect(self._check_done, signals.engine_stopped)
crawler.start()
return name, crawler
def _check_done(self, **kwargs):
if not self._start_crawler():
self._stop_reactor()
def _stop_reactor(self, _=None):
try:
reactor.stop()
except RuntimeError: # raised if already stopped or in shutdown stage
pass
При вызове из shell¶
In []:
In [24]: help(settings)
Help on CrawlerSettings in module scrapy.settings object:
class CrawlerSettings(Settings)
| Method resolution order:
| CrawlerSettings
| Settings
| __builtin__.object
|
| Methods defined here:
|
| __getitem__(self, opt_name)
|
| __init__(self, settings_module=None, **kw)
|
| __str__(self)
|
| ----------------------------------------------------------------------
| Methods inherited from Settings:
|
| get(self, name, default=None)
|
| getbool(self, name, default=False)
| True is: 1, '1', True
| False is: 0, '0', False, None
|
| getdict(self, name, default=None)
|
| getfloat(self, name, default=0.0)
|
| getint(self, name, default=0)
|
| getlist(self, name, default=None)
|
| ----------------------------------------------------------------------
| Data descriptors inherited from Settings:
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
In [25]:
In []:
In [22]: settings.
settings.defaults settings.getdict settings.getlist settings.settings_module
settings.get settings.getfloat settings.global_defaults settings.values
settings.getbool settings.getint settings.overrides
In []:
In [26]: settings.global_defaults.
settings.global_defaults.BOT_NAME settings.global_defaults.LOG_STDOUT
settings.global_defaults.CLOSESPIDER_ERRORCOUNT settings.global_defaults.LOG_UNSERIALIZABLE_REQUESTS
settings.global_defaults.CLOSESPIDER_ITEMCOUNT settings.global_defaults.MAIL_DEBUG
settings.global_defaults.CLOSESPIDER_PAGECOUNT settings.global_defaults.MAIL_FROM
settings.global_defaults.CLOSESPIDER_TIMEOUT settings.global_defaults.MAIL_HOST
settings.global_defaults.COMMANDS_MODULE settings.global_defaults.MAIL_PASS
settings.global_defaults.COMPRESSION_ENABLED settings.global_defaults.MAIL_PORT
settings.global_defaults.CONCURRENT_ITEMS settings.global_defaults.MAIL_USER
settings.global_defaults.CONCURRENT_REQUESTS settings.global_defaults.MEMDEBUG_ENABLED
settings.global_defaults.CONCURRENT_REQUESTS_PER_DOMAIN settings.global_defaults.MEMDEBUG_NOTIFY
settings.global_defaults.CONCURRENT_REQUESTS_PER_IP settings.global_defaults.MEMUSAGE_ENABLED
settings.global_defaults.COOKIES_DEBUG settings.global_defaults.MEMUSAGE_LIMIT_MB
settings.global_defaults.COOKIES_ENABLED settings.global_defaults.MEMUSAGE_NOTIFY_MAIL
settings.global_defaults.DEFAULT_ITEM_CLASS settings.global_defaults.MEMUSAGE_REPORT
settings.global_defaults.DEFAULT_REQUEST_HEADERS settings.global_defaults.MEMUSAGE_WARNING_MB
settings.global_defaults.DEPTH_LIMIT settings.global_defaults.METAREFRESH_ENABLED
settings.global_defaults.DEPTH_PRIORITY settings.global_defaults.METAREFRESH_MAXDELAY
settings.global_defaults.DEPTH_STATS settings.global_defaults.NEWSPIDER_MODULE
settings.global_defaults.DNSCACHE_ENABLED settings.global_defaults.RANDOMIZE_DOWNLOAD_DELAY
settings.global_defaults.DOWNLOADER_CLIENTCONTEXTFACTORY settings.global_defaults.REDIRECT_ENABLED
settings.global_defaults.DOWNLOADER_DEBUG settings.global_defaults.REDIRECT_MAX_TIMES
settings.global_defaults.DOWNLOADER_HTTPCLIENTFACTORY settings.global_defaults.REDIRECT_PRIORITY_ADJUST
settings.global_defaults.DOWNLOADER_MIDDLEWARES settings.global_defaults.REFERER_ENABLED
settings.global_defaults.DOWNLOADER_MIDDLEWARES_BASE settings.global_defaults.RETRY_ENABLED
settings.global_defaults.DOWNLOADER_STATS settings.global_defaults.RETRY_HTTP_CODES
settings.global_defaults.DOWNLOAD_DELAY settings.global_defaults.RETRY_PRIORITY_ADJUST
settings.global_defaults.DOWNLOAD_HANDLERS settings.global_defaults.RETRY_TIMES
settings.global_defaults.DOWNLOAD_HANDLERS_BASE settings.global_defaults.ROBOTSTXT_OBEY
settings.global_defaults.DOWNLOAD_TIMEOUT settings.global_defaults.SCHEDULER
settings.global_defaults.DUPEFILTER_CLASS settings.global_defaults.SCHEDULER_DISK_QUEUE
settings.global_defaults.EDITOR settings.global_defaults.SCHEDULER_MEMORY_QUEUE
settings.global_defaults.EXTENSIONS settings.global_defaults.SPIDER_CONTRACTS
settings.global_defaults.EXTENSIONS_BASE settings.global_defaults.SPIDER_CONTRACTS_BASE
settings.global_defaults.FEED_EXPORTERS settings.global_defaults.SPIDER_MANAGER_CLASS
settings.global_defaults.FEED_EXPORTERS_BASE settings.global_defaults.SPIDER_MIDDLEWARES
settings.global_defaults.FEED_FORMAT settings.global_defaults.SPIDER_MIDDLEWARES_BASE
settings.global_defaults.FEED_STORAGES settings.global_defaults.SPIDER_MODULES
settings.global_defaults.FEED_STORAGES_BASE settings.global_defaults.STATSMAILER_RCPTS
settings.global_defaults.FEED_STORE_EMPTY settings.global_defaults.STATS_CLASS
settings.global_defaults.FEED_URI settings.global_defaults.STATS_DUMP
settings.global_defaults.FEED_URI_PARAMS settings.global_defaults.TELNETCONSOLE_ENABLED
settings.global_defaults.HTTPCACHE_DBM_MODULE settings.global_defaults.TELNETCONSOLE_HOST
settings.global_defaults.HTTPCACHE_DIR settings.global_defaults.TELNETCONSOLE_PORT
settings.global_defaults.HTTPCACHE_ENABLED settings.global_defaults.TEMPLATES_DIR
settings.global_defaults.HTTPCACHE_EXPIRATION_SECS settings.global_defaults.URLLENGTH_LIMIT
settings.global_defaults.HTTPCACHE_IGNORE_HTTP_CODES settings.global_defaults.USER_AGENT
settings.global_defaults.HTTPCACHE_IGNORE_MISSING settings.global_defaults.WEBSERVICE_ENABLED
settings.global_defaults.HTTPCACHE_IGNORE_SCHEMES settings.global_defaults.WEBSERVICE_HOST
settings.global_defaults.HTTPCACHE_POLICY settings.global_defaults.WEBSERVICE_LOGFILE
settings.global_defaults.HTTPCACHE_STORAGE settings.global_defaults.WEBSERVICE_PORT
settings.global_defaults.ITEM_PIPELINES settings.global_defaults.WEBSERVICE_RESOURCES
settings.global_defaults.ITEM_PIPELINES_BASE settings.global_defaults.WEBSERVICE_RESOURCES_BASE
settings.global_defaults.ITEM_PROCESSOR settings.global_defaults.abspath
settings.global_defaults.LOGSTATS_INTERVAL settings.global_defaults.dirname
settings.global_defaults.LOG_ENABLED settings.global_defaults.import_module
settings.global_defaults.LOG_ENCODING settings.global_defaults.join
settings.global_defaults.LOG_FILE settings.global_defaults.os
settings.global_defaults.LOG_FORMATTER settings.global_defaults.sys
settings.global_defaults.LOG_LEVEL
In [26]: settings.global_defaults.
In []:
In [29]: settings.global_defaults.USER_AGENT
Out[29]: 'Scrapy/0.20.1 (+http://scrapy.org)'
This object stores Scrapy settings for the configuration of internal components, and can be used for any further customization.
After instantiation of this class, the new object will have the global default settings described on Built-in settings reference already populated.
Additional values can be passed on initialization with the values argument, and they would take the priority level. If the latter argument is a string, the priority name will be looked up in SETTINGS_PRIORITIES. Otherwise, a expecific integer should be provided.
Once the object is created, new settings can be loaded or updated with the set() method, and can be accessed with the square bracket notation of dictionaries, or with the get() method of the instance and its value conversion variants. When requesting a stored key, the value with the highest priority will be retrieved.
After instantiation of this class, the new object will have the global default settings described on Built-in settings reference already populated.
Additional values can be passed on initialization with the values argument, and they would take the priority level. If the latter argument is a string, the priority name will be looked up in SETTINGS_PRIORITIES. Otherwise, a expecific integer should be provided.
Once the object is created, new settings can be loaded or updated with the set() method, and can be accessed with the square bracket notation of dictionaries, or with the get() method of the instance and its value conversion variants. When requesting a stored key, the value with the highest priority will be retrieved.
Посты чуть ниже также могут вас заинтересовать
Комментариев нет:
Отправить комментарий