????

Your IP : 3.147.70.194


Current Path : /usr/lib/python3.6/site-packages/future/backports/urllib/__pycache__/
Upload File :
Current File : //usr/lib/python3.6/site-packages/future/backports/urllib/__pycache__/robotparser.cpython-36.pyc

3

�c��@s�ddlmZmZmZddlmZddlmZddlm	Z
mZe
e_	ee_dgZ
Gdd�de�ZGdd�de�ZGd	d
�d
e�ZdS)�)�absolute_import�division�unicode_literals)�str)�urllib)�parse�request�RobotFileParserc@sZeZdZdZddd�Zdd�Zdd�Zd	d
�Zdd�Zd
d�Z	dd�Z
dd�Zdd�ZdS)r	zs This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    �cCs,g|_d|_d|_d|_|j|�d|_dS)NFr)�entries�
default_entry�disallow_all�	allow_all�set_url�last_checked)�self�url�r�!/usr/lib/python3.6/robotparser.py�__init__s
zRobotFileParser.__init__cCs|jS)z�Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        )r)rrrr�mtime&szRobotFileParser.mtimecCsddl}|j�|_dS)zYSets the time the robots.txt file was last fetched to the
        current time.

        rN)�timer)rrrrr�modified/szRobotFileParser.modifiedcCs&||_tjj|�dd�\|_|_dS)z,Sets the URL referring to a robots.txt file.��N)rrr�urlparse�host�path)rrrrrr7szRobotFileParser.set_urlcCs|ytjj|j�}WnHtjjk
rZ}z(|jdkr:d|_n|jdkrJd|_WYdd}~XnX|j	�}|j
|jd�j��dS)z4Reads the robots.txt URL and feeds it to the parser.��Ti�Nzutf-8)rr)
rrZurlopenr�errorZ	HTTPError�coder
r�readr�decode�
splitlines)r�f�err�rawrrrr"<s

zRobotFileParser.readcCs,d|jkr|jdkr(||_n|jj|�dS)N�*)�
useragentsrr�append)r�entryrrr�
_add_entryIs

zRobotFileParser._add_entrycCstd}t�}�xN|D�]D}|sL|dkr0t�}d}n|dkrL|j|�t�}d}|jd�}|dkrj|d|�}|j�}|sxq|jdd�}t|�dkr|dj�j�|d<tjj	|dj��|d<|ddkr�|dkr�|j|�t�}|j
j|d�d}q|ddk�r*|dk�rX|jjt
|dd	��d}q|dd
kr|dkr|jjt
|dd��d}qW|dk�rp|j|�dS)z�Parse the input lines from a robots.txt file.

        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        rr��#N�:z
user-agentZdisallowFZallowT)�Entryr,�find�strip�split�len�lowerrr�unquoter)r*�	rulelines�RuleLine)r�lines�stater+�line�irrrrRsJ





zRobotFileParser.parsecCs�|jr
dS|jrdStjjtjj|��}tjjdd|j|j|j	|j
f�}tjj|�}|s\d}x"|jD]}|j
|�rd|j|�SqdW|jr�|jj|�SdS)z=using the parsed robots.txt decide if useragent can fetch urlFTr
�/)r
rrrrr6Z
urlunparserZparamsZqueryZfragment�quoter�
applies_to�	allowancer)r�	useragentrZ
parsed_urlr+rrr�	can_fetch�s 
zRobotFileParser.can_fetchcCsdjdd�|jD��S)Nr
cSsg|]}t|�d�qS)�
)r)�.0r+rrr�
<listcomp>�sz+RobotFileParser.__str__.<locals>.<listcomp>)�joinr)rrrr�__str__�szRobotFileParser.__str__N)r
)
�__name__�
__module__�__qualname__�__doc__rrrrr"r,rrBrGrrrrr	s
	
	3c@s(eZdZdZdd�Zdd�Zdd�ZdS)	r8zoA rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path.cCs*|dkr|rd}tjj|�|_||_dS)Nr
T)rrr>rr@)rrr@rrrr�szRuleLine.__init__cCs|jdkp|j|j�S)Nr()r�
startswith)r�filenamerrrr?�szRuleLine.applies_tocCs|jr
dpdd|jS)NZAllowZDisallowz: )r@r)rrrrrG�szRuleLine.__str__N)rHrIrJrKrr?rGrrrrr8�sr8c@s0eZdZdZdd�Zdd�Zdd�Zdd	�Zd
S)r0z?An entry has one or more user-agents and zero or more rulelinescCsg|_g|_dS)N)r)r7)rrrrr�szEntry.__init__cCsPg}x|jD]}|jd|dg�qWx |jD]}|jt|�dg�q,Wdj|�S)NzUser-agent: rCr
)r)�extendr7rrF)r�ret�agentr;rrrrG�sz
Entry.__str__cCsF|jd�dj�}x.|jD]$}|dkr*dS|j�}||krdSqWdS)z2check if this entry applies to the specified agentr=rr(TF)r3r5r))rrArPrrrr?�szEntry.applies_tocCs$x|jD]}|j|�r|jSqWdS)zZPreconditions:
        - our agent applies to this entry
        - filename is URL decodedT)r7r?r@)rrMr;rrrr@�s

zEntry.allowanceN)rHrIrJrKrrGr?r@rrrrr0�s

r0N)Z
__future__rrrZfuture.builtinsrZfuture.backportsrZfuture.backports.urllibr�_parserZ_request�__all__�objectr	r8r0rrrr�<module>s